feat(dns): add dnscrypt and dns over tcp

2026-02-04 22:08:05 +00:00
parent 5d9b630d13
commit 92351a80a9
12 changed files with 2576 additions and 568 deletions
--- a/scripts/analysis/analyze_dns_metrics.py
+++ b/scripts/analysis/analyze_dns_metrics.py
@@ -1,289 +1,498 @@
-import csv
-import os
-import statistics
-from collections import defaultdict
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
 from pathlib import Path
+from scipy import stats
+import warnings

-def map_server_to_resolver(server):
-    """Map server address/domain to resolver name"""
-    server_lower = server.lower()
-    
-    if '1.1.1.1' in server_lower or 'cloudflare' in server_lower:
-        return 'Cloudflare'
-    elif '8.8.8.8' in server_lower or 'google' in server_lower:
-        return 'Google'
-    elif '9.9.9.9' in server_lower or 'quad9' in server_lower:
-        return 'Quad9'
-    elif 'adguard' in server_lower:
-        return 'AdGuard'
-    else:
-        return server  # Fallback to original server name
+warnings.filterwarnings('ignore')

-def extract_from_new_format(filename):
-    """Parse new filename format: protocol[-flags]-timestamp.csv"""
-    base = filename.replace('.csv', '')
-    parts = base.split('-')
-    
-    if len(parts) < 2:
-        return None, None, None, None
-    
-    protocol = parts[0]
-    timestamp = parts[-1]
-    
-    # Flags are everything between protocol and timestamp
-    flags_str = '-'.join(parts[1:-1])
-    
-    # Determine DNSSEC status
-    if 'auth' in flags_str:
-        dnssec_status = 'auth'  # Authoritative DNSSEC
-    elif 'trust' in flags_str:
-        dnssec_status = 'trust'  # Trust-based DNSSEC
-    else:
-        dnssec_status = 'off'
-    
-    keepalive_status = 'on' if 'persist' in flags_str else 'off'
-    
-    return protocol, dnssec_status, keepalive_status, flags_str
+# Set style for publication-quality plots
+sns.set_style("whitegrid")
+plt.rcParams['figure.dpi'] = 300
+plt.rcParams['savefig.dpi'] = 300
+plt.rcParams['font.size'] = 10
+plt.rcParams['figure.figsize'] = (12, 6)

-def extract_server_info_from_csv(row):
-    """Extract DNSSEC info from CSV row data"""
-    dnssec = row.get('dnssec', 'false').lower() == 'true'
-    auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true'
-    keepalive = row.get('keep_alive', 'false').lower() == 'true'
-    
-    if dnssec:
-        if auth_dnssec:
-            dnssec_status = 'auth'
-        else:
-            dnssec_status = 'trust'
-    else:
-        dnssec_status = 'off'
-    
-    keepalive_status = 'on' if keepalive else 'off'
-    
-    return dnssec_status, keepalive_status
-
-def extract_server_info(file_path, row):
-    """Extract info using directory structure, filename, and CSV data"""
-    path = Path(file_path)
-    
-    # First try to get DNSSEC info from CSV row (most accurate)
-    try:
-        csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row)
-        protocol = row.get('protocol', '').lower()
+class DNSAnalyzer:
+    def __init__(self, results_dir='results'):
+        self.results_dir = Path(results_dir)
+        self.df = None
        
-        # Get server from directory structure
-        parts = path.parts
-        if len(parts) >= 4:
-            potential_date = parts[-2]
-            # Check if it's a date like YYYY-MM-DD
-            if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
-                server = parts[-3]  # resolver folder (e.g., cloudflare)
-                return protocol, server, csv_dnssec_status, csv_keepalive_status
+    def load_all_data(self):
+        """Load all CSV files from the results directory"""
+        data_frames = []
        
-        # Fallback to DNS server field
-        server = row.get('dns_server', '')
-        return protocol, server, csv_dnssec_status, csv_keepalive_status
+        providers = ['adguard', 'cloudflare', 'google', 'quad9']
        
-    except (KeyError, ValueError):
-        pass
-    
-    # Fallback to filename parsing
-    filename = path.name
-    protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename)
-    
-    if protocol:
-        # Get server from directory structure
-        parts = path.parts
-        if len(parts) >= 4:
-            potential_date = parts[-2]
-            if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
-                server = parts[-3]
-                return protocol, server, dnssec_status, keepalive_status
-        
-        # Fallback to DNS server field
-        server = row.get('dns_server', '')
-        return protocol, server, dnssec_status, keepalive_status
-    
-    return None, None, None, None
-
-def get_dnssec_display_name(dnssec_status):
-    """Convert DNSSEC status to display name"""
-    if dnssec_status == 'auth':
-        return 'DNSSEC (Authoritative)'
-    elif dnssec_status == 'trust':
-        return 'DNSSEC (Trust-based)'
-    else:
-        return 'No DNSSEC'
-
-def analyze_dns_data(root_directory, output_file):
-    """Analyze DNS data and generate metrics"""
-    
-    # Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]}
-    measurements = defaultdict(list)
-    
-    # Walk through all directories
-    for root, dirs, files in os.walk(root_directory):
-        for file in files:
-            if file.endswith('.csv'):
-                file_path = os.path.join(root, file)
-                print(f"Processing: {file_path}")
+        for provider in providers:
+            provider_path = self.results_dir / provider
+            if not provider_path.exists():
+                continue
                
+            for csv_file in provider_path.glob('*.csv'):
                try:
-                    with open(file_path, 'r', newline='') as csvfile:
-                        reader = csv.DictReader(csvfile)
-                        
-                        for row_num, row in enumerate(reader, 2):  # Start at 2 since header is row 1
-                            try:
-                                protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row)
-                                
-                                if protocol and server:
-                                    resolver = map_server_to_resolver(server)
-                                    duration_ms = float(row.get('duration_ms', 0))
-                                    
-                                    # Only include successful queries
-                                    if row.get('response_code', '') in ['NOERROR', '']:
-                                        key = (resolver, protocol, dnssec_status, keepalive_status)
-                                        measurements[key].append(duration_ms)
-                                    
-                            except (ValueError, TypeError) as e:
-                                print(f"Data parse error in {file_path} row {row_num}: {e}")
-                                continue
-                                
+                    df = pd.read_csv(csv_file)
+                    df['provider'] = provider
+                    df['test_config'] = csv_file.stem
+                    data_frames.append(df)
                except Exception as e:
-                    print(f"Error processing file {file_path}: {e}")
-                    continue
-    
-    # Calculate statistics grouped by resolver first, then by configuration
-    resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-    
-    for (resolver, protocol, dnssec, keepalive), durations in measurements.items():
-        if durations:
-            stats = {
-                'protocol': protocol.upper(),
-                'dnssec': dnssec,
-                'keepalive': keepalive,
-                'total_queries': len(durations),
-                'avg_latency_ms': round(statistics.mean(durations), 3),
-                'median_latency_ms': round(statistics.median(durations), 3),
-                'min_latency_ms': round(min(durations), 3),
-                'max_latency_ms': round(max(durations), 3),
-                'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3),
-                'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3),
-                'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3)
-            }
-            # Group by resolver -> dnssec -> keepalive -> protocol
-            resolver_results[resolver][dnssec][keepalive].append(stats)
-    
-    # Sort each configuration's results by average latency
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms'])
-    
-    # Write to CSV with all data
-    all_results = []
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                for result in resolver_results[resolver][dnssec][keepalive]:
-                    result['resolver'] = resolver
-                    all_results.append(result)
-    
-    with open(output_file, 'w', newline='') as csvfile:
-        fieldnames = [
-            'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries',
-            'avg_latency_ms', 'median_latency_ms', 'min_latency_ms', 
-            'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms'
-        ]
+                    print(f"Error loading {csv_file}: {e}")
        
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(all_results)
-    
-    print(f"\nAnalysis complete! Full results written to {output_file}")
-    print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}")
-    
-    def print_configuration_table(resolver, dnssec_status, keepalive_status, results):
-        """Print a formatted table for a specific configuration"""
-        ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN"
-        dnssec_display = get_dnssec_display_name(dnssec_status)
+        self.df = pd.concat(data_frames, ignore_index=True)
+        self._clean_and_enrich_data()
+        print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
        
-        print(f"\n  {dnssec_display} - {ka_indicator}")
-        print("  " + "-" * 90)
-        print(f"  {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}")
-        print("  " + "-" * 90)
+    def _clean_and_enrich_data(self):
+        """Clean data and add useful columns"""
+        # Remove failed queries
+        self.df = self.df[self.df['error'].isna()]
        
-        for result in results:
-            print(f"  {result['protocol']:<12} {result['total_queries']:<8} "
-                  f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} "
-                  f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} "
-                  f"{result['p95_latency_ms']:<10}")
-    
-    # Print results grouped by resolver first
-    print(f"\n{'=' * 100}")
-    print("DNS RESOLVER PERFORMANCE COMPARISON")
-    print(f"{'=' * 100}")
-    
-    for resolver in sorted(resolver_results.keys()):
-        print(f"\n{resolver} DNS Resolver")
-        print("=" * 100)
+        # Extract protocol base (remove -auth, -trust suffixes)
+        self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
        
-        # Order configurations logically
-        config_order = [
-            ('off', 'off'),     # No DNSSEC, New connections
-            ('off', 'on'),      # No DNSSEC, Persistent
-            ('trust', 'off'),   # Trust DNSSEC, New connections  
-            ('trust', 'on'),    # Trust DNSSEC, Persistent
-            ('auth', 'off'),    # Auth DNSSEC, New connections
-            ('auth', 'on'),     # Auth DNSSEC, Persistent
-        ]
+        # DNSSEC configuration
+        self.df['dnssec_mode'] = 'none'
+        self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
+        self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
        
-        for dnssec_status, keepalive_status in config_order:
-            if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]:
-                results = resolver_results[resolver][dnssec_status][keepalive_status]
-                if results:  # Only print if there are results
-                    print_configuration_table(resolver, dnssec_status, keepalive_status, results)
-    
-    # Summary comparison across resolvers
-    print(f"\n{'=' * 100}")
-    print("CROSS-RESOLVER PROTOCOL COMPARISON")
-    print(f"{'=' * 100}")
-    
-    # Group by protocol and configuration for cross-resolver comparison
-    protocol_comparison = defaultdict(lambda: defaultdict(list))
-    
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                for result in resolver_results[resolver][dnssec][keepalive]:
-                    config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}"
-                    protocol_comparison[result['protocol']][config_key].append({
-                        'resolver': resolver,
-                        'avg_latency_ms': result['avg_latency_ms'],
-                        'total_queries': result['total_queries']
-                    })
-    
-    for protocol in sorted(protocol_comparison.keys()):
-        print(f"\n{protocol} Protocol Comparison")
-        print("-" * 100)
+        # Protocol categories
+        self.df['protocol_category'] = self.df['protocol_base'].map({
+            'udp': 'Plain DNS',
+            'tls': 'DoT',
+            'https': 'DoH',
+            'doh3': 'DoH/3',
+            'doq': 'DoQ'
+        })
        
-        for config in sorted(protocol_comparison[protocol].keys()):
-            resolvers_data = protocol_comparison[protocol][config]
-            if resolvers_data:
-                print(f"\n  {config}")
-                print("  " + "-" * 60)
-                print(f"  {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}")
-                print("  " + "-" * 60)
-                
-                # Sort by average latency
-                resolvers_data.sort(key=lambda x: x['avg_latency_ms'])
-                
-                for data in resolvers_data:
-                    print(f"  {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}")
+        # Connection persistence
+        self.df['persistence'] = self.df['keep_alive'].fillna(False)
+        
+    def generate_summary_statistics(self):
+        """Generate comprehensive summary statistics"""
+        print("\n" + "="*80)
+        print("SUMMARY STATISTICS")
+        print("="*80)
+        
+        # Overall statistics
+        print("\n--- Overall Performance ---")
+        print(f"Total queries: {len(self.df)}")
+        print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
+        print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
+        print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
+        print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
+        
+        # By protocol
+        print("\n--- Performance by Protocol ---")
+        protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('std', 'std'),
+            ('p95', lambda x: x.quantile(0.95)),
+            ('p99', lambda x: x.quantile(0.99))
+        ]).round(2)
+        print(protocol_stats)
+        
+        # By provider
+        print("\n--- Performance by Provider ---")
+        provider_stats = self.df.groupby('provider')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('std', 'std'),
+            ('p95', lambda x: x.quantile(0.95))
+        ]).round(2)
+        print(provider_stats)
+        
+        # DNSSEC impact
+        print("\n--- DNSSEC Validation Impact ---")
+        dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('overhead_vs_none', lambda x: x.mean())
+        ]).round(2)
+        
+        # Calculate overhead percentage
+        baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
+        if baseline > 0:
+            dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
+        print(dnssec_stats)
+        
+        # Bandwidth analysis
+        print("\n--- Bandwidth Usage ---")
+        bandwidth_stats = self.df.groupby('protocol_category').agg({
+            'request_size_bytes': ['mean', 'median'],
+            'response_size_bytes': ['mean', 'median']
+        }).round(2)
+        print(bandwidth_stats)
+        
+        # Persistence impact (where applicable)
+        print("\n--- Connection Persistence Impact ---")
+        persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
+        if len(persist_protocols) > 0:
+            persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
+                ('mean', 'mean'),
+                ('median', 'median')
+            ]).round(2)
+            print(persist_stats)
+        
+        return {
+            'protocol': protocol_stats,
+            'provider': provider_stats,
+            'dnssec': dnssec_stats,
+            'bandwidth': bandwidth_stats
+        }
+    
+    def plot_latency_by_protocol(self, output_dir='plots'):
+        """Violin plot of latency distribution by protocol"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        plt.figure(figsize=(14, 7))
+        
+        # Order protocols logically
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        sns.violinplot(data=self.df, x='protocol_category', y='duration_ms', 
+                      order=available_protocols, inner='box', cut=0)
+        
+        plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Response Time (ms)', fontsize=12)
+        plt.xticks(rotation=0)
+        
+        # Add mean values as annotations
+        for i, protocol in enumerate(available_protocols):
+            mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
+            plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: latency_by_protocol.png")
+    
+    def plot_provider_comparison(self, output_dir='plots'):
+        """Box plot comparing providers across protocols"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
+        fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
+        
+        protocols = self.df['protocol_category'].unique()
+        protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
+        
+        for idx, protocol in enumerate(protocols[:4]):
+            ax = axes[idx // 2, idx % 2]
+            data = self.df[self.df['protocol_category'] == protocol]
+            
+            if len(data) > 0:
+                sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
+                ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
+                ax.set_xlabel('Provider', fontsize=10)
+                ax.set_ylabel('Response Time (ms)', fontsize=10)
+                ax.tick_params(axis='x', rotation=45)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: provider_comparison.png")
+    
+    def plot_dnssec_impact(self, output_dir='plots'):
+        """Compare DNSSEC validation methods (trust vs auth)"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Filter for protocols that have DNSSEC variations
+        dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
+        
+        if len(dnssec_data) == 0:
+            print("⚠ No DNSSEC data available")
+            return
+        
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+        
+        # Plot 1: Overall DNSSEC impact
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        sns.barplot(data=self.df, x='protocol_category', y='duration_ms', 
+                   hue='dnssec_mode', order=available, ax=ax1, ci=95)
+        ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
+        ax1.set_xlabel('Protocol', fontsize=10)
+        ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
+        ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
+        ax1.tick_params(axis='x', rotation=0)
+        
+        # Plot 2: Trust vs Auth comparison
+        comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
+        pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
+        
+        if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
+            pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
+            pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
+            ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
+            ax2.set_xlabel('Protocol', fontsize=10)
+            ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
+            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
+            ax2.tick_params(axis='x', rotation=45)
+            ax2.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: dnssec_impact.png")
+    
+    def plot_persistence_impact(self, output_dir='plots'):
+        """Analyze impact of connection persistence"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
+        
+        if len(persist_data) == 0:
+            print("⚠ No persistence data available")
+            return
+        
+        plt.figure(figsize=(12, 6))
+        
+        sns.barplot(data=persist_data, x='protocol_base', y='duration_ms', 
+                   hue='persistence', ci=95)
+        
+        plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Mean Response Time (ms)', fontsize=12)
+        plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
+        
+        # Calculate and annotate overhead reduction
+        for protocol in persist_data['protocol_base'].unique():
+            protocol_data = persist_data[persist_data['protocol_base'] == protocol]
+            
+            no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
+            with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
+            
+            if not np.isnan(no_persist) and not np.isnan(with_persist):
+                reduction = ((no_persist - with_persist) / no_persist * 100)
+                print(f"{protocol}: {reduction:.1f}% reduction with persistence")
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: persistence_impact.png")
+    
+    def plot_bandwidth_overhead(self, output_dir='plots'):
+        """Visualize bandwidth usage by protocol"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        bandwidth_data = self.df.groupby('protocol_category').agg({
+            'request_size_bytes': 'mean',
+            'response_size_bytes': 'mean'
+        }).reset_index()
+        
+        bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] + 
+                                         bandwidth_data['response_size_bytes'])
+        
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+        
+        # Plot 1: Request vs Response sizes
+        x = np.arange(len(bandwidth_data))
+        width = 0.35
+        
+        ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width, 
+               label='Request', alpha=0.8)
+        ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width, 
+               label='Response', alpha=0.8)
+        
+        ax1.set_xlabel('Protocol', fontsize=12)
+        ax1.set_ylabel('Bytes', fontsize=12)
+        ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
+        ax1.set_xticks(x)
+        ax1.set_xticklabels(bandwidth_data['protocol_category'])
+        ax1.legend()
+        ax1.grid(axis='y', alpha=0.3)
+        
+        # Plot 2: Total bandwidth overhead vs UDP baseline
+        udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
+        if len(udp_total) > 0:
+            bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
+            
+            colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
+            ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'], 
+                   color=colors, alpha=0.7)
+            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
+            ax2.set_xlabel('Protocol', fontsize=12)
+            ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
+            ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
+            ax2.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: bandwidth_overhead.png")
+    
+    def plot_heatmap(self, output_dir='plots'):
+        """Heatmap of provider-protocol performance"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Create pivot table
+        heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
+        
+        plt.figure(figsize=(12, 8))
+        sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r', 
+                   cbar_kws={'label': 'Median Latency (ms)'})
+        
+        plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Provider', fontsize=12)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: provider_protocol_heatmap.png")
+    
+    def plot_percentile_comparison(self, output_dir='plots'):
+        """Plot percentile comparison across protocols"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        percentiles = [50, 75, 90, 95, 99]
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        percentile_data = []
+        for protocol in available:
+            data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
+            for p in percentiles:
+                percentile_data.append({
+                    'protocol': protocol,
+                    'percentile': f'P{p}',
+                    'latency': np.percentile(data, p)
+                })
+        
+        percentile_df = pd.DataFrame(percentile_data)
+        
+        plt.figure(figsize=(14, 7))
+        sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
+        
+        plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Response Time (ms)', fontsize=12)
+        plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: percentile_comparison.png")
+    
+    def statistical_tests(self):
+        """Perform statistical significance tests"""
+        print("\n" + "="*80)
+        print("STATISTICAL TESTS")
+        print("="*80)
+        
+        # Test 1: Protocol differences (Kruskal-Wallis)
+        protocols = self.df['protocol_category'].unique()
+        if len(protocols) > 2:
+            groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values 
+                     for p in protocols]
+            h_stat, p_value = stats.kruskal(*groups)
+            print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
+            print(f"H-statistic: {h_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
+        
+        # Test 2: DNSSEC impact (Mann-Whitney U)
+        if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
+            none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
+            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
+            
+            u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
+            print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
+            print(f"U-statistic: {u_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
+        
+        # Test 3: Trust vs Auth comparison
+        if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
+            trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
+            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
+            
+            u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
+            print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
+            print(f"U-statistic: {u_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
+    
+    def generate_latex_table(self, output_dir='plots'):
+        """Generate LaTeX table for thesis"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Summary table by protocol
+        summary = self.df.groupby('protocol_category')['duration_ms'].agg([
+            ('Mean', 'mean'),
+            ('Median', 'median'),
+            ('Std Dev', 'std'),
+            ('P95', lambda x: x.quantile(0.95)),
+            ('P99', lambda x: x.quantile(0.99))
+        ]).round(2)
+        
+        latex_code = summary.to_latex(float_format="%.2f")
+        
+        with open(f'{output_dir}/summary_table.tex', 'w') as f:
+            f.write(latex_code)
+        
+        print(f"✓ Saved: summary_table.tex")
+        print("\nLaTeX Table Preview:")
+        print(latex_code)
+    
+    def run_full_analysis(self):
+        """Run complete analysis pipeline"""
+        print("="*80)
+        print("DNS QoS Analysis - Starting Full Analysis")
+        print("="*80)
+        
+        # Load data
+        print("\n[1/10] Loading data...")
+        self.load_all_data()
+        
+        # Generate statistics
+        print("\n[2/10] Generating summary statistics...")
+        self.generate_summary_statistics()
+        
+        # Statistical tests
+        print("\n[3/10] Running statistical tests...")
+        self.statistical_tests()
+        
+        # Generate plots
+        print("\n[4/10] Creating latency by protocol plot...")
+        self.plot_latency_by_protocol()
+        
+        print("\n[5/10] Creating provider comparison plot...")
+        self.plot_provider_comparison()
+        
+        print("\n[6/10] Creating DNSSEC impact plot...")
+        self.plot_dnssec_impact()
+        
+        print("\n[7/10] Creating persistence impact plot...")
+        self.plot_persistence_impact()
+        
+        print("\n[8/10] Creating bandwidth overhead plot...")
+        self.plot_bandwidth_overhead()
+        
+        print("\n[9/10] Creating heatmap...")
+        self.plot_heatmap()
+        
+        print("\n[10/10] Creating percentile comparison...")
+        self.plot_percentile_comparison()
+        
+        # Generate LaTeX table
+        print("\n[Bonus] Generating LaTeX table...")
+        self.generate_latex_table()
+        
+        print("\n" + "="*80)
+        print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
+        print("="*80)
+

 if __name__ == "__main__":
-    root_dir = "."
-    output_file = "dns_metrics.csv"
-    
-    analyze_dns_data(root_dir, output_file)
+    analyzer = DNSAnalyzer(results_dir='results')
+    analyzer.run_full_analysis()