feat(dns): add dnscrypt and dns over tcp
This commit is contained in:
@@ -1,289 +1,498 @@
|
||||
import csv
|
||||
import os
|
||||
import statistics
|
||||
from collections import defaultdict
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from scipy import stats
|
||||
import warnings
|
||||
|
||||
def map_server_to_resolver(server):
|
||||
"""Map server address/domain to resolver name"""
|
||||
server_lower = server.lower()
|
||||
|
||||
if '1.1.1.1' in server_lower or 'cloudflare' in server_lower:
|
||||
return 'Cloudflare'
|
||||
elif '8.8.8.8' in server_lower or 'google' in server_lower:
|
||||
return 'Google'
|
||||
elif '9.9.9.9' in server_lower or 'quad9' in server_lower:
|
||||
return 'Quad9'
|
||||
elif 'adguard' in server_lower:
|
||||
return 'AdGuard'
|
||||
else:
|
||||
return server # Fallback to original server name
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
def extract_from_new_format(filename):
|
||||
"""Parse new filename format: protocol[-flags]-timestamp.csv"""
|
||||
base = filename.replace('.csv', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 2:
|
||||
return None, None, None, None
|
||||
|
||||
protocol = parts[0]
|
||||
timestamp = parts[-1]
|
||||
|
||||
# Flags are everything between protocol and timestamp
|
||||
flags_str = '-'.join(parts[1:-1])
|
||||
|
||||
# Determine DNSSEC status
|
||||
if 'auth' in flags_str:
|
||||
dnssec_status = 'auth' # Authoritative DNSSEC
|
||||
elif 'trust' in flags_str:
|
||||
dnssec_status = 'trust' # Trust-based DNSSEC
|
||||
else:
|
||||
dnssec_status = 'off'
|
||||
|
||||
keepalive_status = 'on' if 'persist' in flags_str else 'off'
|
||||
|
||||
return protocol, dnssec_status, keepalive_status, flags_str
|
||||
# Set style for publication-quality plots
|
||||
sns.set_style("whitegrid")
|
||||
plt.rcParams['figure.dpi'] = 300
|
||||
plt.rcParams['savefig.dpi'] = 300
|
||||
plt.rcParams['font.size'] = 10
|
||||
plt.rcParams['figure.figsize'] = (12, 6)
|
||||
|
||||
def extract_server_info_from_csv(row):
|
||||
"""Extract DNSSEC info from CSV row data"""
|
||||
dnssec = row.get('dnssec', 'false').lower() == 'true'
|
||||
auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true'
|
||||
keepalive = row.get('keep_alive', 'false').lower() == 'true'
|
||||
|
||||
if dnssec:
|
||||
if auth_dnssec:
|
||||
dnssec_status = 'auth'
|
||||
else:
|
||||
dnssec_status = 'trust'
|
||||
else:
|
||||
dnssec_status = 'off'
|
||||
|
||||
keepalive_status = 'on' if keepalive else 'off'
|
||||
|
||||
return dnssec_status, keepalive_status
|
||||
|
||||
def extract_server_info(file_path, row):
|
||||
"""Extract info using directory structure, filename, and CSV data"""
|
||||
path = Path(file_path)
|
||||
|
||||
# First try to get DNSSEC info from CSV row (most accurate)
|
||||
try:
|
||||
csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row)
|
||||
protocol = row.get('protocol', '').lower()
|
||||
class DNSAnalyzer:
|
||||
def __init__(self, results_dir='results'):
|
||||
self.results_dir = Path(results_dir)
|
||||
self.df = None
|
||||
|
||||
# Get server from directory structure
|
||||
parts = path.parts
|
||||
if len(parts) >= 4:
|
||||
potential_date = parts[-2]
|
||||
# Check if it's a date like YYYY-MM-DD
|
||||
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
|
||||
server = parts[-3] # resolver folder (e.g., cloudflare)
|
||||
return protocol, server, csv_dnssec_status, csv_keepalive_status
|
||||
def load_all_data(self):
|
||||
"""Load all CSV files from the results directory"""
|
||||
data_frames = []
|
||||
|
||||
# Fallback to DNS server field
|
||||
server = row.get('dns_server', '')
|
||||
return protocol, server, csv_dnssec_status, csv_keepalive_status
|
||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
||||
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
# Fallback to filename parsing
|
||||
filename = path.name
|
||||
protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename)
|
||||
|
||||
if protocol:
|
||||
# Get server from directory structure
|
||||
parts = path.parts
|
||||
if len(parts) >= 4:
|
||||
potential_date = parts[-2]
|
||||
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
|
||||
server = parts[-3]
|
||||
return protocol, server, dnssec_status, keepalive_status
|
||||
|
||||
# Fallback to DNS server field
|
||||
server = row.get('dns_server', '')
|
||||
return protocol, server, dnssec_status, keepalive_status
|
||||
|
||||
return None, None, None, None
|
||||
|
||||
def get_dnssec_display_name(dnssec_status):
|
||||
"""Convert DNSSEC status to display name"""
|
||||
if dnssec_status == 'auth':
|
||||
return 'DNSSEC (Authoritative)'
|
||||
elif dnssec_status == 'trust':
|
||||
return 'DNSSEC (Trust-based)'
|
||||
else:
|
||||
return 'No DNSSEC'
|
||||
|
||||
def analyze_dns_data(root_directory, output_file):
|
||||
"""Analyze DNS data and generate metrics"""
|
||||
|
||||
# Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]}
|
||||
measurements = defaultdict(list)
|
||||
|
||||
# Walk through all directories
|
||||
for root, dirs, files in os.walk(root_directory):
|
||||
for file in files:
|
||||
if file.endswith('.csv'):
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"Processing: {file_path}")
|
||||
for provider in providers:
|
||||
provider_path = self.results_dir / provider
|
||||
if not provider_path.exists():
|
||||
continue
|
||||
|
||||
for csv_file in provider_path.glob('*.csv'):
|
||||
try:
|
||||
with open(file_path, 'r', newline='') as csvfile:
|
||||
reader = csv.DictReader(csvfile)
|
||||
|
||||
for row_num, row in enumerate(reader, 2): # Start at 2 since header is row 1
|
||||
try:
|
||||
protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row)
|
||||
|
||||
if protocol and server:
|
||||
resolver = map_server_to_resolver(server)
|
||||
duration_ms = float(row.get('duration_ms', 0))
|
||||
|
||||
# Only include successful queries
|
||||
if row.get('response_code', '') in ['NOERROR', '']:
|
||||
key = (resolver, protocol, dnssec_status, keepalive_status)
|
||||
measurements[key].append(duration_ms)
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"Data parse error in {file_path} row {row_num}: {e}")
|
||||
continue
|
||||
|
||||
df = pd.read_csv(csv_file)
|
||||
df['provider'] = provider
|
||||
df['test_config'] = csv_file.stem
|
||||
data_frames.append(df)
|
||||
except Exception as e:
|
||||
print(f"Error processing file {file_path}: {e}")
|
||||
continue
|
||||
|
||||
# Calculate statistics grouped by resolver first, then by configuration
|
||||
resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
||||
|
||||
for (resolver, protocol, dnssec, keepalive), durations in measurements.items():
|
||||
if durations:
|
||||
stats = {
|
||||
'protocol': protocol.upper(),
|
||||
'dnssec': dnssec,
|
||||
'keepalive': keepalive,
|
||||
'total_queries': len(durations),
|
||||
'avg_latency_ms': round(statistics.mean(durations), 3),
|
||||
'median_latency_ms': round(statistics.median(durations), 3),
|
||||
'min_latency_ms': round(min(durations), 3),
|
||||
'max_latency_ms': round(max(durations), 3),
|
||||
'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3),
|
||||
'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3),
|
||||
'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3)
|
||||
}
|
||||
# Group by resolver -> dnssec -> keepalive -> protocol
|
||||
resolver_results[resolver][dnssec][keepalive].append(stats)
|
||||
|
||||
# Sort each configuration's results by average latency
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms'])
|
||||
|
||||
# Write to CSV with all data
|
||||
all_results = []
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
for result in resolver_results[resolver][dnssec][keepalive]:
|
||||
result['resolver'] = resolver
|
||||
all_results.append(result)
|
||||
|
||||
with open(output_file, 'w', newline='') as csvfile:
|
||||
fieldnames = [
|
||||
'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries',
|
||||
'avg_latency_ms', 'median_latency_ms', 'min_latency_ms',
|
||||
'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms'
|
||||
]
|
||||
print(f"Error loading {csv_file}: {e}")
|
||||
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(all_results)
|
||||
|
||||
print(f"\nAnalysis complete! Full results written to {output_file}")
|
||||
print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}")
|
||||
|
||||
def print_configuration_table(resolver, dnssec_status, keepalive_status, results):
|
||||
"""Print a formatted table for a specific configuration"""
|
||||
ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN"
|
||||
dnssec_display = get_dnssec_display_name(dnssec_status)
|
||||
self.df = pd.concat(data_frames, ignore_index=True)
|
||||
self._clean_and_enrich_data()
|
||||
print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
|
||||
|
||||
print(f"\n {dnssec_display} - {ka_indicator}")
|
||||
print(" " + "-" * 90)
|
||||
print(f" {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}")
|
||||
print(" " + "-" * 90)
|
||||
def _clean_and_enrich_data(self):
|
||||
"""Clean data and add useful columns"""
|
||||
# Remove failed queries
|
||||
self.df = self.df[self.df['error'].isna()]
|
||||
|
||||
for result in results:
|
||||
print(f" {result['protocol']:<12} {result['total_queries']:<8} "
|
||||
f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} "
|
||||
f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} "
|
||||
f"{result['p95_latency_ms']:<10}")
|
||||
|
||||
# Print results grouped by resolver first
|
||||
print(f"\n{'=' * 100}")
|
||||
print("DNS RESOLVER PERFORMANCE COMPARISON")
|
||||
print(f"{'=' * 100}")
|
||||
|
||||
for resolver in sorted(resolver_results.keys()):
|
||||
print(f"\n{resolver} DNS Resolver")
|
||||
print("=" * 100)
|
||||
# Extract protocol base (remove -auth, -trust suffixes)
|
||||
self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
|
||||
|
||||
# Order configurations logically
|
||||
config_order = [
|
||||
('off', 'off'), # No DNSSEC, New connections
|
||||
('off', 'on'), # No DNSSEC, Persistent
|
||||
('trust', 'off'), # Trust DNSSEC, New connections
|
||||
('trust', 'on'), # Trust DNSSEC, Persistent
|
||||
('auth', 'off'), # Auth DNSSEC, New connections
|
||||
('auth', 'on'), # Auth DNSSEC, Persistent
|
||||
]
|
||||
# DNSSEC configuration
|
||||
self.df['dnssec_mode'] = 'none'
|
||||
self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
|
||||
self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
|
||||
|
||||
for dnssec_status, keepalive_status in config_order:
|
||||
if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]:
|
||||
results = resolver_results[resolver][dnssec_status][keepalive_status]
|
||||
if results: # Only print if there are results
|
||||
print_configuration_table(resolver, dnssec_status, keepalive_status, results)
|
||||
|
||||
# Summary comparison across resolvers
|
||||
print(f"\n{'=' * 100}")
|
||||
print("CROSS-RESOLVER PROTOCOL COMPARISON")
|
||||
print(f"{'=' * 100}")
|
||||
|
||||
# Group by protocol and configuration for cross-resolver comparison
|
||||
protocol_comparison = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
for result in resolver_results[resolver][dnssec][keepalive]:
|
||||
config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}"
|
||||
protocol_comparison[result['protocol']][config_key].append({
|
||||
'resolver': resolver,
|
||||
'avg_latency_ms': result['avg_latency_ms'],
|
||||
'total_queries': result['total_queries']
|
||||
})
|
||||
|
||||
for protocol in sorted(protocol_comparison.keys()):
|
||||
print(f"\n{protocol} Protocol Comparison")
|
||||
print("-" * 100)
|
||||
# Protocol categories
|
||||
self.df['protocol_category'] = self.df['protocol_base'].map({
|
||||
'udp': 'Plain DNS',
|
||||
'tls': 'DoT',
|
||||
'https': 'DoH',
|
||||
'doh3': 'DoH/3',
|
||||
'doq': 'DoQ'
|
||||
})
|
||||
|
||||
for config in sorted(protocol_comparison[protocol].keys()):
|
||||
resolvers_data = protocol_comparison[protocol][config]
|
||||
if resolvers_data:
|
||||
print(f"\n {config}")
|
||||
print(" " + "-" * 60)
|
||||
print(f" {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}")
|
||||
print(" " + "-" * 60)
|
||||
|
||||
# Sort by average latency
|
||||
resolvers_data.sort(key=lambda x: x['avg_latency_ms'])
|
||||
|
||||
for data in resolvers_data:
|
||||
print(f" {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}")
|
||||
# Connection persistence
|
||||
self.df['persistence'] = self.df['keep_alive'].fillna(False)
|
||||
|
||||
def generate_summary_statistics(self):
|
||||
"""Generate comprehensive summary statistics"""
|
||||
print("\n" + "="*80)
|
||||
print("SUMMARY STATISTICS")
|
||||
print("="*80)
|
||||
|
||||
# Overall statistics
|
||||
print("\n--- Overall Performance ---")
|
||||
print(f"Total queries: {len(self.df)}")
|
||||
print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
|
||||
print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
|
||||
print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
|
||||
print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
|
||||
|
||||
# By protocol
|
||||
print("\n--- Performance by Protocol ---")
|
||||
protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std'),
|
||||
('p95', lambda x: x.quantile(0.95)),
|
||||
('p99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
print(protocol_stats)
|
||||
|
||||
# By provider
|
||||
print("\n--- Performance by Provider ---")
|
||||
provider_stats = self.df.groupby('provider')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std'),
|
||||
('p95', lambda x: x.quantile(0.95))
|
||||
]).round(2)
|
||||
print(provider_stats)
|
||||
|
||||
# DNSSEC impact
|
||||
print("\n--- DNSSEC Validation Impact ---")
|
||||
dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('overhead_vs_none', lambda x: x.mean())
|
||||
]).round(2)
|
||||
|
||||
# Calculate overhead percentage
|
||||
baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
|
||||
if baseline > 0:
|
||||
dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
|
||||
print(dnssec_stats)
|
||||
|
||||
# Bandwidth analysis
|
||||
print("\n--- Bandwidth Usage ---")
|
||||
bandwidth_stats = self.df.groupby('protocol_category').agg({
|
||||
'request_size_bytes': ['mean', 'median'],
|
||||
'response_size_bytes': ['mean', 'median']
|
||||
}).round(2)
|
||||
print(bandwidth_stats)
|
||||
|
||||
# Persistence impact (where applicable)
|
||||
print("\n--- Connection Persistence Impact ---")
|
||||
persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
|
||||
if len(persist_protocols) > 0:
|
||||
persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
|
||||
('mean', 'mean'),
|
||||
('median', 'median')
|
||||
]).round(2)
|
||||
print(persist_stats)
|
||||
|
||||
return {
|
||||
'protocol': protocol_stats,
|
||||
'provider': provider_stats,
|
||||
'dnssec': dnssec_stats,
|
||||
'bandwidth': bandwidth_stats
|
||||
}
|
||||
|
||||
def plot_latency_by_protocol(self, output_dir='plots'):
|
||||
"""Violin plot of latency distribution by protocol"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
|
||||
# Order protocols logically
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
sns.violinplot(data=self.df, x='protocol_category', y='duration_ms',
|
||||
order=available_protocols, inner='box', cut=0)
|
||||
|
||||
plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
||||
plt.xticks(rotation=0)
|
||||
|
||||
# Add mean values as annotations
|
||||
for i, protocol in enumerate(available_protocols):
|
||||
mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
|
||||
plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: latency_by_protocol.png")
|
||||
|
||||
def plot_provider_comparison(self, output_dir='plots'):
|
||||
"""Box plot comparing providers across protocols"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
||||
fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
|
||||
|
||||
protocols = self.df['protocol_category'].unique()
|
||||
protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
|
||||
|
||||
for idx, protocol in enumerate(protocols[:4]):
|
||||
ax = axes[idx // 2, idx % 2]
|
||||
data = self.df[self.df['protocol_category'] == protocol]
|
||||
|
||||
if len(data) > 0:
|
||||
sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
|
||||
ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
|
||||
ax.set_xlabel('Provider', fontsize=10)
|
||||
ax.set_ylabel('Response Time (ms)', fontsize=10)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: provider_comparison.png")
|
||||
|
||||
def plot_dnssec_impact(self, output_dir='plots'):
|
||||
"""Compare DNSSEC validation methods (trust vs auth)"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Filter for protocols that have DNSSEC variations
|
||||
dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
|
||||
|
||||
if len(dnssec_data) == 0:
|
||||
print("⚠ No DNSSEC data available")
|
||||
return
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
|
||||
# Plot 1: Overall DNSSEC impact
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
sns.barplot(data=self.df, x='protocol_category', y='duration_ms',
|
||||
hue='dnssec_mode', order=available, ax=ax1, ci=95)
|
||||
ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
|
||||
ax1.set_xlabel('Protocol', fontsize=10)
|
||||
ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
|
||||
ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
|
||||
ax1.tick_params(axis='x', rotation=0)
|
||||
|
||||
# Plot 2: Trust vs Auth comparison
|
||||
comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
|
||||
pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
|
||||
|
||||
if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
|
||||
pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
|
||||
pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
|
||||
ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
|
||||
ax2.set_xlabel('Protocol', fontsize=10)
|
||||
ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
|
||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
||||
ax2.tick_params(axis='x', rotation=45)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: dnssec_impact.png")
|
||||
|
||||
def plot_persistence_impact(self, output_dir='plots'):
|
||||
"""Analyze impact of connection persistence"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
|
||||
|
||||
if len(persist_data) == 0:
|
||||
print("⚠ No persistence data available")
|
||||
return
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
|
||||
sns.barplot(data=persist_data, x='protocol_base', y='duration_ms',
|
||||
hue='persistence', ci=95)
|
||||
|
||||
plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Mean Response Time (ms)', fontsize=12)
|
||||
plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
|
||||
|
||||
# Calculate and annotate overhead reduction
|
||||
for protocol in persist_data['protocol_base'].unique():
|
||||
protocol_data = persist_data[persist_data['protocol_base'] == protocol]
|
||||
|
||||
no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
|
||||
with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
|
||||
|
||||
if not np.isnan(no_persist) and not np.isnan(with_persist):
|
||||
reduction = ((no_persist - with_persist) / no_persist * 100)
|
||||
print(f"{protocol}: {reduction:.1f}% reduction with persistence")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: persistence_impact.png")
|
||||
|
||||
def plot_bandwidth_overhead(self, output_dir='plots'):
|
||||
"""Visualize bandwidth usage by protocol"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
bandwidth_data = self.df.groupby('protocol_category').agg({
|
||||
'request_size_bytes': 'mean',
|
||||
'response_size_bytes': 'mean'
|
||||
}).reset_index()
|
||||
|
||||
bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] +
|
||||
bandwidth_data['response_size_bytes'])
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
|
||||
# Plot 1: Request vs Response sizes
|
||||
x = np.arange(len(bandwidth_data))
|
||||
width = 0.35
|
||||
|
||||
ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width,
|
||||
label='Request', alpha=0.8)
|
||||
ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width,
|
||||
label='Response', alpha=0.8)
|
||||
|
||||
ax1.set_xlabel('Protocol', fontsize=12)
|
||||
ax1.set_ylabel('Bytes', fontsize=12)
|
||||
ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(bandwidth_data['protocol_category'])
|
||||
ax1.legend()
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Plot 2: Total bandwidth overhead vs UDP baseline
|
||||
udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
|
||||
if len(udp_total) > 0:
|
||||
bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
|
||||
|
||||
colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
|
||||
ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'],
|
||||
color=colors, alpha=0.7)
|
||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
||||
ax2.set_xlabel('Protocol', fontsize=12)
|
||||
ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
|
||||
ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: bandwidth_overhead.png")
|
||||
|
||||
def plot_heatmap(self, output_dir='plots'):
|
||||
"""Heatmap of provider-protocol performance"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Create pivot table
|
||||
heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r',
|
||||
cbar_kws={'label': 'Median Latency (ms)'})
|
||||
|
||||
plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Provider', fontsize=12)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: provider_protocol_heatmap.png")
|
||||
|
||||
def plot_percentile_comparison(self, output_dir='plots'):
|
||||
"""Plot percentile comparison across protocols"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
percentiles = [50, 75, 90, 95, 99]
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
percentile_data = []
|
||||
for protocol in available:
|
||||
data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
|
||||
for p in percentiles:
|
||||
percentile_data.append({
|
||||
'protocol': protocol,
|
||||
'percentile': f'P{p}',
|
||||
'latency': np.percentile(data, p)
|
||||
})
|
||||
|
||||
percentile_df = pd.DataFrame(percentile_data)
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
|
||||
|
||||
plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
||||
plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: percentile_comparison.png")
|
||||
|
||||
def statistical_tests(self):
|
||||
"""Perform statistical significance tests"""
|
||||
print("\n" + "="*80)
|
||||
print("STATISTICAL TESTS")
|
||||
print("="*80)
|
||||
|
||||
# Test 1: Protocol differences (Kruskal-Wallis)
|
||||
protocols = self.df['protocol_category'].unique()
|
||||
if len(protocols) > 2:
|
||||
groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values
|
||||
for p in protocols]
|
||||
h_stat, p_value = stats.kruskal(*groups)
|
||||
print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
|
||||
print(f"H-statistic: {h_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
|
||||
|
||||
# Test 2: DNSSEC impact (Mann-Whitney U)
|
||||
if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
||||
none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
|
||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
||||
|
||||
u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
|
||||
print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
|
||||
print(f"U-statistic: {u_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
|
||||
|
||||
# Test 3: Trust vs Auth comparison
|
||||
if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
||||
trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
|
||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
||||
|
||||
u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
|
||||
print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
|
||||
print(f"U-statistic: {u_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
|
||||
|
||||
def generate_latex_table(self, output_dir='plots'):
|
||||
"""Generate LaTeX table for thesis"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Summary table by protocol
|
||||
summary = self.df.groupby('protocol_category')['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('Std Dev', 'std'),
|
||||
('P95', lambda x: x.quantile(0.95)),
|
||||
('P99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
|
||||
latex_code = summary.to_latex(float_format="%.2f")
|
||||
|
||||
with open(f'{output_dir}/summary_table.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f"✓ Saved: summary_table.tex")
|
||||
print("\nLaTeX Table Preview:")
|
||||
print(latex_code)
|
||||
|
||||
def run_full_analysis(self):
|
||||
"""Run complete analysis pipeline"""
|
||||
print("="*80)
|
||||
print("DNS QoS Analysis - Starting Full Analysis")
|
||||
print("="*80)
|
||||
|
||||
# Load data
|
||||
print("\n[1/10] Loading data...")
|
||||
self.load_all_data()
|
||||
|
||||
# Generate statistics
|
||||
print("\n[2/10] Generating summary statistics...")
|
||||
self.generate_summary_statistics()
|
||||
|
||||
# Statistical tests
|
||||
print("\n[3/10] Running statistical tests...")
|
||||
self.statistical_tests()
|
||||
|
||||
# Generate plots
|
||||
print("\n[4/10] Creating latency by protocol plot...")
|
||||
self.plot_latency_by_protocol()
|
||||
|
||||
print("\n[5/10] Creating provider comparison plot...")
|
||||
self.plot_provider_comparison()
|
||||
|
||||
print("\n[6/10] Creating DNSSEC impact plot...")
|
||||
self.plot_dnssec_impact()
|
||||
|
||||
print("\n[7/10] Creating persistence impact plot...")
|
||||
self.plot_persistence_impact()
|
||||
|
||||
print("\n[8/10] Creating bandwidth overhead plot...")
|
||||
self.plot_bandwidth_overhead()
|
||||
|
||||
print("\n[9/10] Creating heatmap...")
|
||||
self.plot_heatmap()
|
||||
|
||||
print("\n[10/10] Creating percentile comparison...")
|
||||
self.plot_percentile_comparison()
|
||||
|
||||
# Generate LaTeX table
|
||||
print("\n[Bonus] Generating LaTeX table...")
|
||||
self.generate_latex_table()
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
|
||||
print("="*80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root_dir = "."
|
||||
output_file = "dns_metrics.csv"
|
||||
|
||||
analyze_dns_data(root_dir, output_file)
|
||||
analyzer = DNSAnalyzer(results_dir='results')
|
||||
analyzer.run_full_analysis()
|
||||
|
||||
536
scripts/analysis/analyze_simple.py
Normal file
536
scripts/analysis/analyze_simple.py
Normal file
@@ -0,0 +1,536 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
from dateutil import parser as date_parser
|
||||
import dpkt
|
||||
|
||||
# Set style
|
||||
sns.set_style("whitegrid")
|
||||
plt.rcParams['figure.dpi'] = 300
|
||||
plt.rcParams['savefig.dpi'] = 300
|
||||
plt.rcParams['font.size'] = 10
|
||||
|
||||
class FastDNSAnalyzer:
|
||||
def __init__(self, results_dir='results'):
|
||||
self.results_dir = Path(results_dir)
|
||||
self.all_data = []
|
||||
|
||||
def should_include_file(self, filename):
|
||||
"""Filter out DNSSEC and non-persist files"""
|
||||
name = filename.stem
|
||||
if 'auth' in name or 'trust' in name:
|
||||
return False
|
||||
if name in ['tls', 'https']:
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse_rfc3339_nano(self, timestamp_str):
|
||||
"""Parse RFC3339Nano timestamp with timezone"""
|
||||
try:
|
||||
dt = date_parser.parse(timestamp_str)
|
||||
return dt.astimezone(datetime.timezone.utc).timestamp()
|
||||
except Exception as e:
|
||||
print(f" Error parsing timestamp {timestamp_str}: {e}")
|
||||
return None
|
||||
|
||||
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
|
||||
"""Fast bandwidth extraction using dpkt"""
|
||||
print(f" Analyzing pcap: {pcap_file.name}")
|
||||
|
||||
try:
|
||||
with open(pcap_file, 'rb') as f:
|
||||
pcap = dpkt.pcap.Reader(f)
|
||||
|
||||
# Build query time windows
|
||||
query_windows = []
|
||||
for idx, row in csv_data.iterrows():
|
||||
start_time = self.parse_rfc3339_nano(row['timestamp'])
|
||||
if start_time is None:
|
||||
continue
|
||||
|
||||
duration_seconds = row['duration_ns'] / 1_000_000_000
|
||||
end_time = start_time + duration_seconds
|
||||
|
||||
query_windows.append({
|
||||
'index': idx,
|
||||
'start': start_time,
|
||||
'end': end_time,
|
||||
'bytes_sent': 0,
|
||||
'bytes_received': 0,
|
||||
'packets_sent': 0,
|
||||
'packets_received': 0
|
||||
})
|
||||
|
||||
if not query_windows:
|
||||
print(" ✗ No valid query windows")
|
||||
return None
|
||||
|
||||
# Sort windows for faster matching
|
||||
query_windows.sort(key=lambda x: x['start'])
|
||||
|
||||
# Process packets
|
||||
packet_count = 0
|
||||
matched_count = 0
|
||||
|
||||
for timestamp, buf in pcap:
|
||||
packet_count += 1
|
||||
packet_size = len(buf)
|
||||
|
||||
# Quick parse to determine direction
|
||||
try:
|
||||
eth = dpkt.ethernet.Ethernet(buf)
|
||||
|
||||
# Get IP layer
|
||||
if isinstance(eth.data, dpkt.ip.IP):
|
||||
ip = eth.data
|
||||
elif isinstance(eth.data, dpkt.ip6.IP6):
|
||||
ip = eth.data
|
||||
else:
|
||||
continue
|
||||
|
||||
# Get transport layer
|
||||
if isinstance(ip.data, dpkt.udp.UDP):
|
||||
transport = ip.data
|
||||
src_port = transport.sport
|
||||
dst_port = transport.dport
|
||||
elif isinstance(ip.data, dpkt.tcp.TCP):
|
||||
transport = ip.data
|
||||
src_port = transport.sport
|
||||
dst_port = transport.dport
|
||||
else:
|
||||
continue
|
||||
|
||||
# Determine direction (client port usually higher)
|
||||
is_outbound = src_port > dst_port
|
||||
|
||||
# Binary search for matching window
|
||||
for window in query_windows:
|
||||
if window['start'] <= timestamp <= window['end']:
|
||||
if is_outbound:
|
||||
window['bytes_sent'] += packet_size
|
||||
window['packets_sent'] += 1
|
||||
else:
|
||||
window['bytes_received'] += packet_size
|
||||
window['packets_received'] += 1
|
||||
matched_count += 1
|
||||
break
|
||||
elif timestamp < window['start']:
|
||||
break # No more windows to check
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
|
||||
|
||||
# Convert to DataFrame
|
||||
bandwidth_df = pd.DataFrame(query_windows)
|
||||
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
|
||||
'packets_sent', 'packets_received']]
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error reading pcap: {e}")
|
||||
return None
|
||||
|
||||
def load_data(self):
|
||||
"""Load all relevant CSV files and extract bandwidth from pcaps"""
|
||||
print("Loading data and analyzing bandwidth...")
|
||||
|
||||
for provider_dir in self.results_dir.iterdir():
|
||||
if not provider_dir.is_dir():
|
||||
continue
|
||||
|
||||
provider = provider_dir.name
|
||||
|
||||
for csv_file in provider_dir.glob('*.csv'):
|
||||
if not self.should_include_file(csv_file):
|
||||
continue
|
||||
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
df['provider'] = provider
|
||||
df['test_file'] = csv_file.stem
|
||||
df['csv_path'] = str(csv_file)
|
||||
|
||||
# Find corresponding pcap file
|
||||
pcap_file = csv_file.with_suffix('.pcap')
|
||||
if pcap_file.exists():
|
||||
print(f" Processing: {provider}/{csv_file.name}")
|
||||
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
|
||||
|
||||
if bandwidth_data is not None and len(bandwidth_data) > 0:
|
||||
# Merge bandwidth data
|
||||
df = df.reset_index(drop=True)
|
||||
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
|
||||
df[col] = 0
|
||||
|
||||
for _, row in bandwidth_data.iterrows():
|
||||
idx = int(row['index'])
|
||||
if idx < len(df):
|
||||
df.at[idx, 'bytes_sent'] = row['bytes_sent']
|
||||
df.at[idx, 'bytes_received'] = row['bytes_received']
|
||||
df.at[idx, 'packets_sent'] = row['packets_sent']
|
||||
df.at[idx, 'packets_received'] = row['packets_received']
|
||||
|
||||
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
|
||||
|
||||
print(f" ✓ Extracted bandwidth for {len(df)} queries")
|
||||
else:
|
||||
print(f" ⚠ Could not extract bandwidth data")
|
||||
else:
|
||||
print(f" ⚠ No pcap found for {csv_file.name}")
|
||||
|
||||
self.all_data.append(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error loading {csv_file}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print(f"\nTotal files loaded: {len(self.all_data)}")
|
||||
|
||||
def create_line_graphs(self, output_dir='output/line_graphs'):
|
||||
"""Create line graphs for latency and bandwidth"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating line graphs...")
|
||||
|
||||
for df in self.all_data:
|
||||
provider = df['provider'].iloc[0]
|
||||
test_name = df['test_file'].iloc[0]
|
||||
|
||||
df['query_index'] = range(1, len(df) + 1)
|
||||
|
||||
# Create figure with 2 subplots
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
|
||||
|
||||
# Plot 1: Latency
|
||||
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
|
||||
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
|
||||
mean_latency = df['duration_ms'].mean()
|
||||
ax1.axhline(y=mean_latency, color='r', linestyle='--',
|
||||
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
|
||||
ax1.set_xlabel('Query Number', fontsize=12)
|
||||
ax1.set_ylabel('Latency (ms)', fontsize=12)
|
||||
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 2: Bandwidth
|
||||
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
|
||||
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
|
||||
markersize=4, linewidth=1, alpha=0.7,
|
||||
color='orange', label='Sent')
|
||||
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
|
||||
markersize=4, linewidth=1, alpha=0.7,
|
||||
color='green', label='Received')
|
||||
|
||||
mean_sent = df['bytes_sent'].mean()
|
||||
mean_received = df['bytes_received'].mean()
|
||||
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
|
||||
linewidth=1.5, alpha=0.5)
|
||||
ax2.axhline(y=mean_received, color='green', linestyle='--',
|
||||
linewidth=1.5, alpha=0.5)
|
||||
|
||||
ax2.set_xlabel('Query Number', fontsize=12)
|
||||
ax2.set_ylabel('Bytes', fontsize=12)
|
||||
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
|
||||
fontsize=12, fontweight='bold')
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
fig.suptitle(f'{provider.upper()} - {test_name}',
|
||||
fontsize=14, fontweight='bold')
|
||||
plt.tight_layout()
|
||||
|
||||
filename = f"{provider}_{test_name}.png"
|
||||
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
print(f" ✓ Created: {filename}")
|
||||
|
||||
def get_protocol_name(self, test_file):
|
||||
"""Extract clean protocol name"""
|
||||
name = test_file.replace('-persist', '')
|
||||
|
||||
protocol_map = {
|
||||
'udp': 'Plain DNS (UDP)',
|
||||
'tls': 'DoT (DNS over TLS)',
|
||||
'https': 'DoH (DNS over HTTPS)',
|
||||
'doh3': 'DoH/3 (DNS over HTTP/3)',
|
||||
'doq': 'DoQ (DNS over QUIC)'
|
||||
}
|
||||
|
||||
return protocol_map.get(name, name.upper())
|
||||
|
||||
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
|
||||
"""Create bar graphs comparing resolvers for latency and bandwidth"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating resolver comparison graphs...")
|
||||
|
||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
||||
protocols = combined_df['test_file'].unique()
|
||||
|
||||
for protocol in protocols:
|
||||
protocol_data = combined_df[combined_df['test_file'] == protocol]
|
||||
protocol_name = self.get_protocol_name(protocol)
|
||||
|
||||
# Latency stats
|
||||
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std')
|
||||
]).reset_index()
|
||||
|
||||
# Create latency comparison
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
fig.suptitle(f'{protocol_name} - Latency Comparison',
|
||||
fontsize=16, fontweight='bold')
|
||||
|
||||
# Mean latency
|
||||
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
|
||||
color='steelblue', alpha=0.8, edgecolor='black')
|
||||
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
|
||||
yerr=latency_stats['std'], fmt='none', color='black',
|
||||
capsize=5, alpha=0.6)
|
||||
|
||||
for bar in bars1:
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.2f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax1.set_xlabel('Resolver', fontsize=12)
|
||||
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
|
||||
ax1.set_title('Mean Latency', fontsize=12)
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Median latency
|
||||
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
|
||||
color='coral', alpha=0.8, edgecolor='black')
|
||||
|
||||
for bar in bars2:
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.2f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax2.set_xlabel('Resolver', fontsize=12)
|
||||
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
|
||||
ax2.set_title('Median Latency', fontsize=12)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" ✓ Created: latency_{protocol}.png")
|
||||
|
||||
# Bandwidth comparison
|
||||
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
|
||||
bandwidth_stats = protocol_data.groupby('provider').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).reset_index()
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
|
||||
fontsize=16, fontweight='bold')
|
||||
|
||||
# Sent vs Received
|
||||
x = np.arange(len(bandwidth_stats))
|
||||
width = 0.35
|
||||
|
||||
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
|
||||
label='Sent', color='orange', alpha=0.8, edgecolor='black')
|
||||
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
|
||||
label='Received', color='green', alpha=0.8, edgecolor='black')
|
||||
|
||||
ax1.set_xlabel('Resolver', fontsize=12)
|
||||
ax1.set_ylabel('Bytes per Query', fontsize=12)
|
||||
ax1.set_title('Average Bandwidth per Query', fontsize=12)
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(bandwidth_stats['provider'])
|
||||
ax1.legend()
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Total bandwidth
|
||||
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
|
||||
color='purple', alpha=0.8, edgecolor='black')
|
||||
|
||||
for bar in bars3:
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.0f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax2.set_xlabel('Resolver', fontsize=12)
|
||||
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
|
||||
ax2.set_title('Total Bandwidth per Query', fontsize=12)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" ✓ Created: bandwidth_{protocol}.png")
|
||||
|
||||
def generate_latex_tables(self, output_dir='output/tables'):
|
||||
"""Generate LaTeX tables with latency and bandwidth statistics"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating LaTeX tables...")
|
||||
|
||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
||||
|
||||
# Generate latency table for each resolver
|
||||
for provider in combined_df['provider'].unique():
|
||||
provider_data = combined_df[combined_df['provider'] == provider]
|
||||
|
||||
stats = provider_data.groupby('test_file')['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('Std Dev', 'std'),
|
||||
('P95', lambda x: x.quantile(0.95)),
|
||||
('P99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
|
||||
stats.index = stats.index.map(self.get_protocol_name)
|
||||
stats.index.name = 'Protocol'
|
||||
|
||||
latex_code = stats.to_latex(
|
||||
caption=f'{provider.upper()} - Latency Statistics (ms)',
|
||||
label=f'tab:{provider}_latency',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: {provider}_latency.tex")
|
||||
|
||||
# Generate bandwidth table for each resolver
|
||||
for provider in combined_df['provider'].unique():
|
||||
provider_data = combined_df[combined_df['provider'] == provider]
|
||||
|
||||
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
|
||||
continue
|
||||
|
||||
bandwidth_stats = provider_data.groupby('test_file').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).round(2)
|
||||
|
||||
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
|
||||
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
|
||||
bandwidth_stats.index.name = 'Protocol'
|
||||
|
||||
latex_code = bandwidth_stats.to_latex(
|
||||
caption=f'{provider.upper()} - Bandwidth Statistics',
|
||||
label=f'tab:{provider}_bandwidth',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: {provider}_bandwidth.tex")
|
||||
|
||||
# Generate protocol efficiency table
|
||||
print("\nGenerating protocol efficiency table...")
|
||||
|
||||
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
|
||||
protocol_bandwidth = combined_df.groupby('test_file').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).round(2)
|
||||
|
||||
# Find UDP baseline
|
||||
udp_baseline = None
|
||||
for protocol in protocol_bandwidth.index:
|
||||
if 'udp' in protocol:
|
||||
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
|
||||
break
|
||||
|
||||
if udp_baseline and udp_baseline > 0:
|
||||
protocol_bandwidth['Overhead vs UDP (%)'] = (
|
||||
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
|
||||
).round(1)
|
||||
protocol_bandwidth['Efficiency (%)'] = (
|
||||
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
|
||||
).round(1)
|
||||
|
||||
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
|
||||
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
|
||||
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
|
||||
protocol_bandwidth.index.name = 'Protocol'
|
||||
|
||||
latex_code = protocol_bandwidth.to_latex(
|
||||
caption='Protocol Bandwidth Efficiency Comparison',
|
||||
label='tab:protocol_efficiency',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: protocol_efficiency.tex")
|
||||
print("\n--- Protocol Efficiency ---")
|
||||
print(protocol_bandwidth.to_string())
|
||||
|
||||
# Generate combined comparison tables
|
||||
for metric in ['Mean', 'Median', 'P95']:
|
||||
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('P95', lambda x: x.quantile(0.95))
|
||||
]).round(2)
|
||||
|
||||
pivot_table = comparison_stats[metric].unstack(level=0)
|
||||
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
|
||||
pivot_table.index.name = 'Protocol'
|
||||
|
||||
latex_code = pivot_table.to_latex(
|
||||
caption=f'Resolver Latency Comparison - {metric} (ms)',
|
||||
label=f'tab:comparison_{metric.lower()}',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: comparison_{metric.lower()}.tex")
|
||||
|
||||
def run_analysis(self):
|
||||
"""Run the complete analysis"""
|
||||
print("="*80)
|
||||
print("Fast DNS QoS Analysis with Bandwidth")
|
||||
print("="*80)
|
||||
|
||||
self.load_data()
|
||||
|
||||
if not self.all_data:
|
||||
print("\n⚠ No data loaded.")
|
||||
return
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.create_line_graphs()
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.create_resolver_comparison_bars()
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.generate_latex_tables()
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("✓ Analysis Complete!")
|
||||
print("="*80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyzer = FastDNSAnalyzer(results_dir='results')
|
||||
analyzer.run_analysis()
|
||||
Reference in New Issue
Block a user