import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np from pathlib import Path import datetime from dateutil import parser as date_parser import dpkt # Set style sns.set_style("whitegrid") plt.rcParams['figure.dpi'] = 300 plt.rcParams['savefig.dpi'] = 300 plt.rcParams['font.size'] = 10 class FastDNSAnalyzer: def __init__(self, results_dir='results'): self.results_dir = Path(results_dir) self.all_data = [] def should_include_file(self, filename): """Filter out DNSSEC and non-persist files""" name = filename.stem if 'auth' in name or 'trust' in name: return False if name in ['tls', 'https']: return False return True def parse_rfc3339_nano(self, timestamp_str): """Parse RFC3339Nano timestamp with timezone""" try: dt = date_parser.parse(timestamp_str) return dt.astimezone(datetime.timezone.utc).timestamp() except Exception as e: print(f" Error parsing timestamp {timestamp_str}: {e}") return None def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data): """Fast bandwidth extraction using dpkt""" print(f" Analyzing pcap: {pcap_file.name}") try: with open(pcap_file, 'rb') as f: pcap = dpkt.pcap.Reader(f) # Build query time windows query_windows = [] for idx, row in csv_data.iterrows(): start_time = self.parse_rfc3339_nano(row['timestamp']) if start_time is None: continue duration_seconds = row['duration_ns'] / 1_000_000_000 end_time = start_time + duration_seconds query_windows.append({ 'index': idx, 'start': start_time, 'end': end_time, 'bytes_sent': 0, 'bytes_received': 0, 'packets_sent': 0, 'packets_received': 0 }) if not query_windows: print(" ✗ No valid query windows") return None # Sort windows for faster matching query_windows.sort(key=lambda x: x['start']) # Process packets packet_count = 0 matched_count = 0 for timestamp, buf in pcap: packet_count += 1 packet_size = len(buf) # Quick parse to determine direction try: eth = dpkt.ethernet.Ethernet(buf) # Get IP layer if isinstance(eth.data, dpkt.ip.IP): ip = eth.data elif isinstance(eth.data, dpkt.ip6.IP6): ip = eth.data else: continue # Get transport layer if isinstance(ip.data, dpkt.udp.UDP): transport = ip.data src_port = transport.sport dst_port = transport.dport elif isinstance(ip.data, dpkt.tcp.TCP): transport = ip.data src_port = transport.sport dst_port = transport.dport else: continue # Determine direction (client port usually higher) is_outbound = src_port > dst_port # Binary search for matching window for window in query_windows: if window['start'] <= timestamp <= window['end']: if is_outbound: window['bytes_sent'] += packet_size window['packets_sent'] += 1 else: window['bytes_received'] += packet_size window['packets_received'] += 1 matched_count += 1 break elif timestamp < window['start']: break # No more windows to check except Exception: continue print(f" ✓ Processed {packet_count} packets, matched {matched_count}") # Convert to DataFrame bandwidth_df = pd.DataFrame(query_windows) return bandwidth_df[['index', 'bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']] except Exception as e: print(f" ✗ Error reading pcap: {e}") return None def load_data(self): """Load all relevant CSV files and extract bandwidth from pcaps""" print("Loading data and analyzing bandwidth...") for provider_dir in self.results_dir.iterdir(): if not provider_dir.is_dir(): continue provider = provider_dir.name for csv_file in provider_dir.glob('*.csv'): if not self.should_include_file(csv_file): continue try: df = pd.read_csv(csv_file) df['provider'] = provider df['test_file'] = csv_file.stem df['csv_path'] = str(csv_file) # Find corresponding pcap file pcap_file = csv_file.with_suffix('.pcap') if pcap_file.exists(): print(f" Processing: {provider}/{csv_file.name}") bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df) if bandwidth_data is not None and len(bandwidth_data) > 0: # Merge bandwidth data df = df.reset_index(drop=True) for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']: df[col] = 0 for _, row in bandwidth_data.iterrows(): idx = int(row['index']) if idx < len(df): df.at[idx, 'bytes_sent'] = row['bytes_sent'] df.at[idx, 'bytes_received'] = row['bytes_received'] df.at[idx, 'packets_sent'] = row['packets_sent'] df.at[idx, 'packets_received'] = row['packets_received'] df['total_bytes'] = df['bytes_sent'] + df['bytes_received'] print(f" ✓ Extracted bandwidth for {len(df)} queries") else: print(f" ⚠ Could not extract bandwidth data") else: print(f" ⚠ No pcap found for {csv_file.name}") self.all_data.append(df) except Exception as e: print(f" ✗ Error loading {csv_file}: {e}") import traceback traceback.print_exc() print(f"\nTotal files loaded: {len(self.all_data)}") def create_line_graphs(self, output_dir='output/line_graphs'): """Create line graphs for latency and bandwidth""" Path(output_dir).mkdir(parents=True, exist_ok=True) print("\nGenerating line graphs...") for df in self.all_data: provider = df['provider'].iloc[0] test_name = df['test_file'].iloc[0] df['query_index'] = range(1, len(df) + 1) # Create figure with 2 subplots fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10)) # Plot 1: Latency ax1.plot(df['query_index'], df['duration_ms'], marker='o', markersize=4, linewidth=1, alpha=0.7, color='steelblue') mean_latency = df['duration_ms'].mean() ax1.axhline(y=mean_latency, color='r', linestyle='--', label=f'Mean: {mean_latency:.2f} ms', linewidth=2) ax1.set_xlabel('Query Number', fontsize=12) ax1.set_ylabel('Latency (ms)', fontsize=12) ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold') ax1.legend() ax1.grid(True, alpha=0.3) # Plot 2: Bandwidth if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0: ax2.plot(df['query_index'], df['bytes_sent'], marker='s', markersize=4, linewidth=1, alpha=0.7, color='orange', label='Sent') ax2.plot(df['query_index'], df['bytes_received'], marker='^', markersize=4, linewidth=1, alpha=0.7, color='green', label='Received') mean_sent = df['bytes_sent'].mean() mean_received = df['bytes_received'].mean() ax2.axhline(y=mean_sent, color='orange', linestyle='--', linewidth=1.5, alpha=0.5) ax2.axhline(y=mean_received, color='green', linestyle='--', linewidth=1.5, alpha=0.5) ax2.set_xlabel('Query Number', fontsize=12) ax2.set_ylabel('Bytes', fontsize=12) ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)', fontsize=12, fontweight='bold') ax2.legend() ax2.grid(True, alpha=0.3) fig.suptitle(f'{provider.upper()} - {test_name}', fontsize=14, fontweight='bold') plt.tight_layout() filename = f"{provider}_{test_name}.png" plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight') plt.close() print(f" ✓ Created: {filename}") def get_protocol_name(self, test_file): """Extract clean protocol name""" name = test_file.replace('-persist', '') protocol_map = { 'udp': 'Plain DNS (UDP)', 'tls': 'DoT (DNS over TLS)', 'https': 'DoH (DNS over HTTPS)', 'doh3': 'DoH/3 (DNS over HTTP/3)', 'doq': 'DoQ (DNS over QUIC)' } return protocol_map.get(name, name.upper()) def create_resolver_comparison_bars(self, output_dir='output/comparisons'): """Create bar graphs comparing resolvers for latency and bandwidth""" Path(output_dir).mkdir(parents=True, exist_ok=True) print("\nGenerating resolver comparison graphs...") combined_df = pd.concat(self.all_data, ignore_index=True) protocols = combined_df['test_file'].unique() for protocol in protocols: protocol_data = combined_df[combined_df['test_file'] == protocol] protocol_name = self.get_protocol_name(protocol) # Latency stats latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([ ('mean', 'mean'), ('median', 'median'), ('std', 'std') ]).reset_index() # Create latency comparison fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) fig.suptitle(f'{protocol_name} - Latency Comparison', fontsize=16, fontweight='bold') # Mean latency bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'], color='steelblue', alpha=0.8, edgecolor='black') ax1.errorbar(latency_stats['provider'], latency_stats['mean'], yerr=latency_stats['std'], fmt='none', color='black', capsize=5, alpha=0.6) for bar in bars1: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2f}', ha='center', va='bottom', fontweight='bold') ax1.set_xlabel('Resolver', fontsize=12) ax1.set_ylabel('Mean Latency (ms)', fontsize=12) ax1.set_title('Mean Latency', fontsize=12) ax1.grid(axis='y', alpha=0.3) # Median latency bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'], color='coral', alpha=0.8, edgecolor='black') for bar in bars2: height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2f}', ha='center', va='bottom', fontweight='bold') ax2.set_xlabel('Resolver', fontsize=12) ax2.set_ylabel('Median Latency (ms)', fontsize=12) ax2.set_title('Median Latency', fontsize=12) ax2.grid(axis='y', alpha=0.3) plt.tight_layout() plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight') plt.close() print(f" ✓ Created: latency_{protocol}.png") # Bandwidth comparison if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0: bandwidth_stats = protocol_data.groupby('provider').agg({ 'bytes_sent': 'mean', 'bytes_received': 'mean', 'total_bytes': 'mean' }).reset_index() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) fig.suptitle(f'{protocol_name} - Bandwidth Comparison', fontsize=16, fontweight='bold') # Sent vs Received x = np.arange(len(bandwidth_stats)) width = 0.35 bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width, label='Sent', color='orange', alpha=0.8, edgecolor='black') bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width, label='Received', color='green', alpha=0.8, edgecolor='black') ax1.set_xlabel('Resolver', fontsize=12) ax1.set_ylabel('Bytes per Query', fontsize=12) ax1.set_title('Average Bandwidth per Query', fontsize=12) ax1.set_xticks(x) ax1.set_xticklabels(bandwidth_stats['provider']) ax1.legend() ax1.grid(axis='y', alpha=0.3) # Total bandwidth bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'], color='purple', alpha=0.8, edgecolor='black') for bar in bars3: height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height, f'{height:.0f}', ha='center', va='bottom', fontweight='bold') ax2.set_xlabel('Resolver', fontsize=12) ax2.set_ylabel('Total Bytes per Query', fontsize=12) ax2.set_title('Total Bandwidth per Query', fontsize=12) ax2.grid(axis='y', alpha=0.3) plt.tight_layout() plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight') plt.close() print(f" ✓ Created: bandwidth_{protocol}.png") def generate_latex_tables(self, output_dir='output/tables'): """Generate LaTeX tables with latency and bandwidth statistics""" Path(output_dir).mkdir(parents=True, exist_ok=True) print("\nGenerating LaTeX tables...") combined_df = pd.concat(self.all_data, ignore_index=True) # Generate latency table for each resolver for provider in combined_df['provider'].unique(): provider_data = combined_df[combined_df['provider'] == provider] stats = provider_data.groupby('test_file')['duration_ms'].agg([ ('Mean', 'mean'), ('Median', 'median'), ('Std Dev', 'std'), ('P95', lambda x: x.quantile(0.95)), ('P99', lambda x: x.quantile(0.99)) ]).round(2) stats.index = stats.index.map(self.get_protocol_name) stats.index.name = 'Protocol' latex_code = stats.to_latex( caption=f'{provider.upper()} - Latency Statistics (ms)', label=f'tab:{provider}_latency', float_format="%.2f" ) with open(f'{output_dir}/{provider}_latency.tex', 'w') as f: f.write(latex_code) print(f" ✓ Created: {provider}_latency.tex") # Generate bandwidth table for each resolver for provider in combined_df['provider'].unique(): provider_data = combined_df[combined_df['provider'] == provider] if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0: continue bandwidth_stats = provider_data.groupby('test_file').agg({ 'bytes_sent': 'mean', 'bytes_received': 'mean', 'total_bytes': 'mean' }).round(2) bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)'] bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name) bandwidth_stats.index.name = 'Protocol' latex_code = bandwidth_stats.to_latex( caption=f'{provider.upper()} - Bandwidth Statistics', label=f'tab:{provider}_bandwidth', float_format="%.2f" ) with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f: f.write(latex_code) print(f" ✓ Created: {provider}_bandwidth.tex") # Generate protocol efficiency table print("\nGenerating protocol efficiency table...") if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0: protocol_bandwidth = combined_df.groupby('test_file').agg({ 'bytes_sent': 'mean', 'bytes_received': 'mean', 'total_bytes': 'mean' }).round(2) # Find UDP baseline udp_baseline = None for protocol in protocol_bandwidth.index: if 'udp' in protocol: udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes'] break if udp_baseline and udp_baseline > 0: protocol_bandwidth['Overhead vs UDP (%)'] = ( (protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100 ).round(1) protocol_bandwidth['Efficiency (%)'] = ( 100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100) ).round(1) protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)'] protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name) protocol_bandwidth.index.name = 'Protocol' latex_code = protocol_bandwidth.to_latex( caption='Protocol Bandwidth Efficiency Comparison', label='tab:protocol_efficiency', float_format="%.2f" ) with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f: f.write(latex_code) print(f" ✓ Created: protocol_efficiency.tex") print("\n--- Protocol Efficiency ---") print(protocol_bandwidth.to_string()) # Generate combined comparison tables for metric in ['Mean', 'Median', 'P95']: comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([ ('Mean', 'mean'), ('Median', 'median'), ('P95', lambda x: x.quantile(0.95)) ]).round(2) pivot_table = comparison_stats[metric].unstack(level=0) pivot_table.index = pivot_table.index.map(self.get_protocol_name) pivot_table.index.name = 'Protocol' latex_code = pivot_table.to_latex( caption=f'Resolver Latency Comparison - {metric} (ms)', label=f'tab:comparison_{metric.lower()}', float_format="%.2f" ) with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f: f.write(latex_code) print(f" ✓ Created: comparison_{metric.lower()}.tex") def run_analysis(self): """Run the complete analysis""" print("="*80) print("Fast DNS QoS Analysis with Bandwidth") print("="*80) self.load_data() if not self.all_data: print("\n⚠ No data loaded.") return print("\n" + "="*80) self.create_line_graphs() print("\n" + "="*80) self.create_resolver_comparison_bars() print("\n" + "="*80) self.generate_latex_tables() print("\n" + "="*80) print("✓ Analysis Complete!") print("="*80) if __name__ == "__main__": analyzer = FastDNSAnalyzer(results_dir='results') analyzer.run_analysis()