feat(profiling): Add CPU and MEM profiling

This commit is contained in:
2026-04-06 13:15:09 +01:00
parent 0dc77c4583
commit cd8a6c5433
17 changed files with 631 additions and 2740 deletions
-498
View File
@@ -1,498 +0,0 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# Set style for publication-quality plots
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['figure.figsize'] = (12, 6)
class DNSAnalyzer:
def __init__(self, results_dir='results'):
self.results_dir = Path(results_dir)
self.df = None
def load_all_data(self):
"""Load all CSV files from the results directory"""
data_frames = []
providers = ['adguard', 'cloudflare', 'google', 'quad9']
for provider in providers:
provider_path = self.results_dir / provider
if not provider_path.exists():
continue
for csv_file in provider_path.glob('*.csv'):
try:
df = pd.read_csv(csv_file)
df['provider'] = provider
df['test_config'] = csv_file.stem
data_frames.append(df)
except Exception as e:
print(f"Error loading {csv_file}: {e}")
self.df = pd.concat(data_frames, ignore_index=True)
self._clean_and_enrich_data()
print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
def _clean_and_enrich_data(self):
"""Clean data and add useful columns"""
# Remove failed queries
self.df = self.df[self.df['error'].isna()]
# Extract protocol base (remove -auth, -trust suffixes)
self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
# DNSSEC configuration
self.df['dnssec_mode'] = 'none'
self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
# Protocol categories
self.df['protocol_category'] = self.df['protocol_base'].map({
'udp': 'Plain DNS',
'tls': 'DoT',
'https': 'DoH',
'doh3': 'DoH/3',
'doq': 'DoQ'
})
# Connection persistence
self.df['persistence'] = self.df['keep_alive'].fillna(False)
def generate_summary_statistics(self):
"""Generate comprehensive summary statistics"""
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
# Overall statistics
print("\n--- Overall Performance ---")
print(f"Total queries: {len(self.df)}")
print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
# By protocol
print("\n--- Performance by Protocol ---")
protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('std', 'std'),
('p95', lambda x: x.quantile(0.95)),
('p99', lambda x: x.quantile(0.99))
]).round(2)
print(protocol_stats)
# By provider
print("\n--- Performance by Provider ---")
provider_stats = self.df.groupby('provider')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('std', 'std'),
('p95', lambda x: x.quantile(0.95))
]).round(2)
print(provider_stats)
# DNSSEC impact
print("\n--- DNSSEC Validation Impact ---")
dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('overhead_vs_none', lambda x: x.mean())
]).round(2)
# Calculate overhead percentage
baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
if baseline > 0:
dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
print(dnssec_stats)
# Bandwidth analysis
print("\n--- Bandwidth Usage ---")
bandwidth_stats = self.df.groupby('protocol_category').agg({
'request_size_bytes': ['mean', 'median'],
'response_size_bytes': ['mean', 'median']
}).round(2)
print(bandwidth_stats)
# Persistence impact (where applicable)
print("\n--- Connection Persistence Impact ---")
persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
if len(persist_protocols) > 0:
persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
('mean', 'mean'),
('median', 'median')
]).round(2)
print(persist_stats)
return {
'protocol': protocol_stats,
'provider': provider_stats,
'dnssec': dnssec_stats,
'bandwidth': bandwidth_stats
}
def plot_latency_by_protocol(self, output_dir='plots'):
"""Violin plot of latency distribution by protocol"""
Path(output_dir).mkdir(exist_ok=True)
plt.figure(figsize=(14, 7))
# Order protocols logically
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
sns.violinplot(data=self.df, x='protocol_category', y='duration_ms',
order=available_protocols, inner='box', cut=0)
plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Response Time (ms)', fontsize=12)
plt.xticks(rotation=0)
# Add mean values as annotations
for i, protocol in enumerate(available_protocols):
mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: latency_by_protocol.png")
def plot_provider_comparison(self, output_dir='plots'):
"""Box plot comparing providers across protocols"""
Path(output_dir).mkdir(exist_ok=True)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
protocols = self.df['protocol_category'].unique()
protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
for idx, protocol in enumerate(protocols[:4]):
ax = axes[idx // 2, idx % 2]
data = self.df[self.df['protocol_category'] == protocol]
if len(data) > 0:
sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
ax.set_xlabel('Provider', fontsize=10)
ax.set_ylabel('Response Time (ms)', fontsize=10)
ax.tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: provider_comparison.png")
def plot_dnssec_impact(self, output_dir='plots'):
"""Compare DNSSEC validation methods (trust vs auth)"""
Path(output_dir).mkdir(exist_ok=True)
# Filter for protocols that have DNSSEC variations
dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
if len(dnssec_data) == 0:
print("⚠ No DNSSEC data available")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# Plot 1: Overall DNSSEC impact
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
sns.barplot(data=self.df, x='protocol_category', y='duration_ms',
hue='dnssec_mode', order=available, ax=ax1, ci=95)
ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
ax1.set_xlabel('Protocol', fontsize=10)
ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
ax1.tick_params(axis='x', rotation=0)
# Plot 2: Trust vs Auth comparison
comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
ax2.set_xlabel('Protocol', fontsize=10)
ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
ax2.tick_params(axis='x', rotation=45)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: dnssec_impact.png")
def plot_persistence_impact(self, output_dir='plots'):
"""Analyze impact of connection persistence"""
Path(output_dir).mkdir(exist_ok=True)
persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
if len(persist_data) == 0:
print("⚠ No persistence data available")
return
plt.figure(figsize=(12, 6))
sns.barplot(data=persist_data, x='protocol_base', y='duration_ms',
hue='persistence', ci=95)
plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Mean Response Time (ms)', fontsize=12)
plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
# Calculate and annotate overhead reduction
for protocol in persist_data['protocol_base'].unique():
protocol_data = persist_data[persist_data['protocol_base'] == protocol]
no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
if not np.isnan(no_persist) and not np.isnan(with_persist):
reduction = ((no_persist - with_persist) / no_persist * 100)
print(f"{protocol}: {reduction:.1f}% reduction with persistence")
plt.tight_layout()
plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: persistence_impact.png")
def plot_bandwidth_overhead(self, output_dir='plots'):
"""Visualize bandwidth usage by protocol"""
Path(output_dir).mkdir(exist_ok=True)
bandwidth_data = self.df.groupby('protocol_category').agg({
'request_size_bytes': 'mean',
'response_size_bytes': 'mean'
}).reset_index()
bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] +
bandwidth_data['response_size_bytes'])
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# Plot 1: Request vs Response sizes
x = np.arange(len(bandwidth_data))
width = 0.35
ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width,
label='Request', alpha=0.8)
ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width,
label='Response', alpha=0.8)
ax1.set_xlabel('Protocol', fontsize=12)
ax1.set_ylabel('Bytes', fontsize=12)
ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
ax1.set_xticks(x)
ax1.set_xticklabels(bandwidth_data['protocol_category'])
ax1.legend()
ax1.grid(axis='y', alpha=0.3)
# Plot 2: Total bandwidth overhead vs UDP baseline
udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
if len(udp_total) > 0:
bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'],
color=colors, alpha=0.7)
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
ax2.set_xlabel('Protocol', fontsize=12)
ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: bandwidth_overhead.png")
def plot_heatmap(self, output_dir='plots'):
"""Heatmap of provider-protocol performance"""
Path(output_dir).mkdir(exist_ok=True)
# Create pivot table
heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r',
cbar_kws={'label': 'Median Latency (ms)'})
plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Provider', fontsize=12)
plt.tight_layout()
plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: provider_protocol_heatmap.png")
def plot_percentile_comparison(self, output_dir='plots'):
"""Plot percentile comparison across protocols"""
Path(output_dir).mkdir(exist_ok=True)
percentiles = [50, 75, 90, 95, 99]
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
percentile_data = []
for protocol in available:
data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
for p in percentiles:
percentile_data.append({
'protocol': protocol,
'percentile': f'P{p}',
'latency': np.percentile(data, p)
})
percentile_df = pd.DataFrame(percentile_data)
plt.figure(figsize=(14, 7))
sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Response Time (ms)', fontsize=12)
plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: percentile_comparison.png")
def statistical_tests(self):
"""Perform statistical significance tests"""
print("\n" + "="*80)
print("STATISTICAL TESTS")
print("="*80)
# Test 1: Protocol differences (Kruskal-Wallis)
protocols = self.df['protocol_category'].unique()
if len(protocols) > 2:
groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values
for p in protocols]
h_stat, p_value = stats.kruskal(*groups)
print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
print(f"H-statistic: {h_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
# Test 2: DNSSEC impact (Mann-Whitney U)
if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
print(f"U-statistic: {u_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
# Test 3: Trust vs Auth comparison
if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
print(f"U-statistic: {u_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
def generate_latex_table(self, output_dir='plots'):
"""Generate LaTeX table for thesis"""
Path(output_dir).mkdir(exist_ok=True)
# Summary table by protocol
summary = self.df.groupby('protocol_category')['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('Std Dev', 'std'),
('P95', lambda x: x.quantile(0.95)),
('P99', lambda x: x.quantile(0.99))
]).round(2)
latex_code = summary.to_latex(float_format="%.2f")
with open(f'{output_dir}/summary_table.tex', 'w') as f:
f.write(latex_code)
print(f"✓ Saved: summary_table.tex")
print("\nLaTeX Table Preview:")
print(latex_code)
def run_full_analysis(self):
"""Run complete analysis pipeline"""
print("="*80)
print("DNS QoS Analysis - Starting Full Analysis")
print("="*80)
# Load data
print("\n[1/10] Loading data...")
self.load_all_data()
# Generate statistics
print("\n[2/10] Generating summary statistics...")
self.generate_summary_statistics()
# Statistical tests
print("\n[3/10] Running statistical tests...")
self.statistical_tests()
# Generate plots
print("\n[4/10] Creating latency by protocol plot...")
self.plot_latency_by_protocol()
print("\n[5/10] Creating provider comparison plot...")
self.plot_provider_comparison()
print("\n[6/10] Creating DNSSEC impact plot...")
self.plot_dnssec_impact()
print("\n[7/10] Creating persistence impact plot...")
self.plot_persistence_impact()
print("\n[8/10] Creating bandwidth overhead plot...")
self.plot_bandwidth_overhead()
print("\n[9/10] Creating heatmap...")
self.plot_heatmap()
print("\n[10/10] Creating percentile comparison...")
self.plot_percentile_comparison()
# Generate LaTeX table
print("\n[Bonus] Generating LaTeX table...")
self.generate_latex_table()
print("\n" + "="*80)
print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
print("="*80)
if __name__ == "__main__":
analyzer = DNSAnalyzer(results_dir='results')
analyzer.run_full_analysis()
-536
View File
@@ -1,536 +0,0 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import datetime
from dateutil import parser as date_parser
import dpkt
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10
class FastDNSAnalyzer:
def __init__(self, results_dir='results'):
self.results_dir = Path(results_dir)
self.all_data = []
def should_include_file(self, filename):
"""Filter out DNSSEC and non-persist files"""
name = filename.stem
if 'auth' in name or 'trust' in name:
return False
if name in ['tls', 'https']:
return False
return True
def parse_rfc3339_nano(self, timestamp_str):
"""Parse RFC3339Nano timestamp with timezone"""
try:
dt = date_parser.parse(timestamp_str)
return dt.astimezone(datetime.timezone.utc).timestamp()
except Exception as e:
print(f" Error parsing timestamp {timestamp_str}: {e}")
return None
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
"""Fast bandwidth extraction using dpkt"""
print(f" Analyzing pcap: {pcap_file.name}")
try:
with open(pcap_file, 'rb') as f:
pcap = dpkt.pcap.Reader(f)
# Build query time windows
query_windows = []
for idx, row in csv_data.iterrows():
start_time = self.parse_rfc3339_nano(row['timestamp'])
if start_time is None:
continue
duration_seconds = row['duration_ns'] / 1_000_000_000
end_time = start_time + duration_seconds
query_windows.append({
'index': idx,
'start': start_time,
'end': end_time,
'bytes_sent': 0,
'bytes_received': 0,
'packets_sent': 0,
'packets_received': 0
})
if not query_windows:
print(" ✗ No valid query windows")
return None
# Sort windows for faster matching
query_windows.sort(key=lambda x: x['start'])
# Process packets
packet_count = 0
matched_count = 0
for timestamp, buf in pcap:
packet_count += 1
packet_size = len(buf)
# Quick parse to determine direction
try:
eth = dpkt.ethernet.Ethernet(buf)
# Get IP layer
if isinstance(eth.data, dpkt.ip.IP):
ip = eth.data
elif isinstance(eth.data, dpkt.ip6.IP6):
ip = eth.data
else:
continue
# Get transport layer
if isinstance(ip.data, dpkt.udp.UDP):
transport = ip.data
src_port = transport.sport
dst_port = transport.dport
elif isinstance(ip.data, dpkt.tcp.TCP):
transport = ip.data
src_port = transport.sport
dst_port = transport.dport
else:
continue
# Determine direction (client port usually higher)
is_outbound = src_port > dst_port
# Binary search for matching window
for window in query_windows:
if window['start'] <= timestamp <= window['end']:
if is_outbound:
window['bytes_sent'] += packet_size
window['packets_sent'] += 1
else:
window['bytes_received'] += packet_size
window['packets_received'] += 1
matched_count += 1
break
elif timestamp < window['start']:
break # No more windows to check
except Exception:
continue
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
# Convert to DataFrame
bandwidth_df = pd.DataFrame(query_windows)
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
'packets_sent', 'packets_received']]
except Exception as e:
print(f" ✗ Error reading pcap: {e}")
return None
def load_data(self):
"""Load all relevant CSV files and extract bandwidth from pcaps"""
print("Loading data and analyzing bandwidth...")
for provider_dir in self.results_dir.iterdir():
if not provider_dir.is_dir():
continue
provider = provider_dir.name
for csv_file in provider_dir.glob('*.csv'):
if not self.should_include_file(csv_file):
continue
try:
df = pd.read_csv(csv_file)
df['provider'] = provider
df['test_file'] = csv_file.stem
df['csv_path'] = str(csv_file)
# Find corresponding pcap file
pcap_file = csv_file.with_suffix('.pcap')
if pcap_file.exists():
print(f" Processing: {provider}/{csv_file.name}")
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
if bandwidth_data is not None and len(bandwidth_data) > 0:
# Merge bandwidth data
df = df.reset_index(drop=True)
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
df[col] = 0
for _, row in bandwidth_data.iterrows():
idx = int(row['index'])
if idx < len(df):
df.at[idx, 'bytes_sent'] = row['bytes_sent']
df.at[idx, 'bytes_received'] = row['bytes_received']
df.at[idx, 'packets_sent'] = row['packets_sent']
df.at[idx, 'packets_received'] = row['packets_received']
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
print(f" ✓ Extracted bandwidth for {len(df)} queries")
else:
print(f" ⚠ Could not extract bandwidth data")
else:
print(f" ⚠ No pcap found for {csv_file.name}")
self.all_data.append(df)
except Exception as e:
print(f" ✗ Error loading {csv_file}: {e}")
import traceback
traceback.print_exc()
print(f"\nTotal files loaded: {len(self.all_data)}")
def create_line_graphs(self, output_dir='output/line_graphs'):
"""Create line graphs for latency and bandwidth"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating line graphs...")
for df in self.all_data:
provider = df['provider'].iloc[0]
test_name = df['test_file'].iloc[0]
df['query_index'] = range(1, len(df) + 1)
# Create figure with 2 subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
# Plot 1: Latency
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
mean_latency = df['duration_ms'].mean()
ax1.axhline(y=mean_latency, color='r', linestyle='--',
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
ax1.set_xlabel('Query Number', fontsize=12)
ax1.set_ylabel('Latency (ms)', fontsize=12)
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Bandwidth
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
markersize=4, linewidth=1, alpha=0.7,
color='orange', label='Sent')
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
markersize=4, linewidth=1, alpha=0.7,
color='green', label='Received')
mean_sent = df['bytes_sent'].mean()
mean_received = df['bytes_received'].mean()
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
linewidth=1.5, alpha=0.5)
ax2.axhline(y=mean_received, color='green', linestyle='--',
linewidth=1.5, alpha=0.5)
ax2.set_xlabel('Query Number', fontsize=12)
ax2.set_ylabel('Bytes', fontsize=12)
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
fontsize=12, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)
fig.suptitle(f'{provider.upper()} - {test_name}',
fontsize=14, fontweight='bold')
plt.tight_layout()
filename = f"{provider}_{test_name}.png"
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
plt.close()
print(f" ✓ Created: {filename}")
def get_protocol_name(self, test_file):
"""Extract clean protocol name"""
name = test_file.replace('-persist', '')
protocol_map = {
'udp': 'Plain DNS (UDP)',
'tls': 'DoT (DNS over TLS)',
'https': 'DoH (DNS over HTTPS)',
'doh3': 'DoH/3 (DNS over HTTP/3)',
'doq': 'DoQ (DNS over QUIC)'
}
return protocol_map.get(name, name.upper())
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
"""Create bar graphs comparing resolvers for latency and bandwidth"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating resolver comparison graphs...")
combined_df = pd.concat(self.all_data, ignore_index=True)
protocols = combined_df['test_file'].unique()
for protocol in protocols:
protocol_data = combined_df[combined_df['test_file'] == protocol]
protocol_name = self.get_protocol_name(protocol)
# Latency stats
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
('mean', 'mean'),
('median', 'median'),
('std', 'std')
]).reset_index()
# Create latency comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle(f'{protocol_name} - Latency Comparison',
fontsize=16, fontweight='bold')
# Mean latency
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
color='steelblue', alpha=0.8, edgecolor='black')
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
yerr=latency_stats['std'], fmt='none', color='black',
capsize=5, alpha=0.6)
for bar in bars1:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}',
ha='center', va='bottom', fontweight='bold')
ax1.set_xlabel('Resolver', fontsize=12)
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
ax1.set_title('Mean Latency', fontsize=12)
ax1.grid(axis='y', alpha=0.3)
# Median latency
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
color='coral', alpha=0.8, edgecolor='black')
for bar in bars2:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}',
ha='center', va='bottom', fontweight='bold')
ax2.set_xlabel('Resolver', fontsize=12)
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
ax2.set_title('Median Latency', fontsize=12)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
plt.close()
print(f" ✓ Created: latency_{protocol}.png")
# Bandwidth comparison
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
bandwidth_stats = protocol_data.groupby('provider').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).reset_index()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
fontsize=16, fontweight='bold')
# Sent vs Received
x = np.arange(len(bandwidth_stats))
width = 0.35
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
label='Sent', color='orange', alpha=0.8, edgecolor='black')
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
label='Received', color='green', alpha=0.8, edgecolor='black')
ax1.set_xlabel('Resolver', fontsize=12)
ax1.set_ylabel('Bytes per Query', fontsize=12)
ax1.set_title('Average Bandwidth per Query', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(bandwidth_stats['provider'])
ax1.legend()
ax1.grid(axis='y', alpha=0.3)
# Total bandwidth
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
color='purple', alpha=0.8, edgecolor='black')
for bar in bars3:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.0f}',
ha='center', va='bottom', fontweight='bold')
ax2.set_xlabel('Resolver', fontsize=12)
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
ax2.set_title('Total Bandwidth per Query', fontsize=12)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
plt.close()
print(f" ✓ Created: bandwidth_{protocol}.png")
def generate_latex_tables(self, output_dir='output/tables'):
"""Generate LaTeX tables with latency and bandwidth statistics"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating LaTeX tables...")
combined_df = pd.concat(self.all_data, ignore_index=True)
# Generate latency table for each resolver
for provider in combined_df['provider'].unique():
provider_data = combined_df[combined_df['provider'] == provider]
stats = provider_data.groupby('test_file')['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('Std Dev', 'std'),
('P95', lambda x: x.quantile(0.95)),
('P99', lambda x: x.quantile(0.99))
]).round(2)
stats.index = stats.index.map(self.get_protocol_name)
stats.index.name = 'Protocol'
latex_code = stats.to_latex(
caption=f'{provider.upper()} - Latency Statistics (ms)',
label=f'tab:{provider}_latency',
float_format="%.2f"
)
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: {provider}_latency.tex")
# Generate bandwidth table for each resolver
for provider in combined_df['provider'].unique():
provider_data = combined_df[combined_df['provider'] == provider]
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
continue
bandwidth_stats = provider_data.groupby('test_file').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).round(2)
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
bandwidth_stats.index.name = 'Protocol'
latex_code = bandwidth_stats.to_latex(
caption=f'{provider.upper()} - Bandwidth Statistics',
label=f'tab:{provider}_bandwidth',
float_format="%.2f"
)
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: {provider}_bandwidth.tex")
# Generate protocol efficiency table
print("\nGenerating protocol efficiency table...")
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
protocol_bandwidth = combined_df.groupby('test_file').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).round(2)
# Find UDP baseline
udp_baseline = None
for protocol in protocol_bandwidth.index:
if 'udp' in protocol:
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
break
if udp_baseline and udp_baseline > 0:
protocol_bandwidth['Overhead vs UDP (%)'] = (
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
).round(1)
protocol_bandwidth['Efficiency (%)'] = (
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
).round(1)
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
protocol_bandwidth.index.name = 'Protocol'
latex_code = protocol_bandwidth.to_latex(
caption='Protocol Bandwidth Efficiency Comparison',
label='tab:protocol_efficiency',
float_format="%.2f"
)
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: protocol_efficiency.tex")
print("\n--- Protocol Efficiency ---")
print(protocol_bandwidth.to_string())
# Generate combined comparison tables
for metric in ['Mean', 'Median', 'P95']:
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('P95', lambda x: x.quantile(0.95))
]).round(2)
pivot_table = comparison_stats[metric].unstack(level=0)
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
pivot_table.index.name = 'Protocol'
latex_code = pivot_table.to_latex(
caption=f'Resolver Latency Comparison - {metric} (ms)',
label=f'tab:comparison_{metric.lower()}',
float_format="%.2f"
)
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: comparison_{metric.lower()}.tex")
def run_analysis(self):
"""Run the complete analysis"""
print("="*80)
print("Fast DNS QoS Analysis with Bandwidth")
print("="*80)
self.load_data()
if not self.all_data:
print("\n⚠ No data loaded.")
return
print("\n" + "="*80)
self.create_line_graphs()
print("\n" + "="*80)
self.create_resolver_comparison_bars()
print("\n" + "="*80)
self.generate_latex_tables()
print("\n" + "="*80)
print("✓ Analysis Complete!")
print("="*80)
if __name__ == "__main__":
analyzer = FastDNSAnalyzer(results_dir='results')
analyzer.run_analysis()
@@ -15,6 +15,15 @@ import dpkt
from dateutil import parser as date_parser
BANDWIDTH_COLUMNS = [
'bytes_sent',
'bytes_received',
'packets_sent',
'packets_received',
'total_bytes',
]
class Packet(NamedTuple):
"""Lightweight packet representation."""
timestamp: float
@@ -36,6 +45,36 @@ class QueryWindow:
self.pkts_received = 0
def is_already_processed(csv_path: Path) -> bool:
"""
Check if CSV has already been processed.
Returns True if bandwidth columns exist AND at least one row has non-zero data.
"""
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
# Check if columns exist
if not reader.fieldnames:
return False
if not all(col in reader.fieldnames for col in BANDWIDTH_COLUMNS):
return False
# Check if any row has non-zero bandwidth data
for row in reader:
for col in BANDWIDTH_COLUMNS:
val = row.get(col, '').strip()
if val and val != '0':
return True
# All rows have zero/empty values - not truly processed
return False
except Exception:
return False
def parse_csv_timestamp(ts_str: str) -> float:
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
dt = date_parser.isoparse(ts_str)
@@ -249,24 +288,20 @@ def write_enriched_csv(
shutil.copy2(csv_path, backup_path)
print(f" Backup: {backup_path.name}")
# Get fieldnames
original_fields = list(queries[0]['data'].keys())
new_fields = [
'bytes_sent',
'bytes_received',
'packets_sent',
'packets_received',
'total_bytes',
# Get fieldnames - filter out any existing bandwidth columns to avoid dupes
original_fields = [
f for f in queries[0]['data'].keys()
if f not in BANDWIDTH_COLUMNS
]
fieldnames = original_fields + new_fields
fieldnames = original_fields + BANDWIDTH_COLUMNS
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for q in queries:
row = q['data'].copy()
for field in new_fields:
row = {k: v for k, v in q['data'].items() if k not in BANDWIDTH_COLUMNS}
for field in BANDWIDTH_COLUMNS:
row[field] = q[field]
writer.writerow(row)
@@ -281,6 +316,7 @@ def process_provider_directory(provider_path: Path):
csv_files = sorted(provider_path.glob('*.csv'))
processed = 0
skipped = 0
total_time = 0
for csv_path in csv_files:
@@ -294,6 +330,12 @@ def process_provider_directory(provider_path: Path):
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
continue
# Check if already processed
if is_already_processed(csv_path):
print(f"\n ⏭ Skipping {csv_path.name} - already processed")
skipped += 1
continue
print(f"\n 📁 {csv_path.name}")
file_start = time.time()
@@ -323,7 +365,8 @@ def process_provider_directory(provider_path: Path):
print(f" ✓ Completed in {file_time:.2f}s")
print(f"\n {'='*58}")
print(f" {provider_path.name}: {processed} files in {total_time:.2f}s")
print(f" {provider_path.name}: {processed} processed, {skipped} skipped")
print(f" Time: {total_time:.2f}s")
print(f" {'='*58}")
+207
View File
@@ -0,0 +1,207 @@
#!/usr/bin/env python3
"""
Merge all DNS test CSVs into a single unified CSV.
Extracts metadata from filenames and directory structure.
"""
import csv
import os
from pathlib import Path
from dateutil import parser as date_parser
import argparse
def parse_config(filename: str) -> dict:
"""
Parse protocol, dnssec_mode, and keep_alive from filename.
Examples:
doh3-auth.csv → protocol=doh3, dnssec=auth, persist=0
tls-trust-persist.csv → protocol=tls, dnssec=trust, persist=1
https.csv → protocol=https, dnssec=off, persist=0
doudp-auth.csv → protocol=doudp, dnssec=auth, persist=0
dnscrypt-trust.csv → protocol=dnscrypt, dnssec=trust, persist=0
"""
base = filename.replace('.csv', '')
parts = base.split('-')
protocol = parts[0]
dnssec_mode = 'off'
keep_alive = 0
for part in parts[1:]:
if part in ('auth', 'trust'):
dnssec_mode = part
elif part == 'persist':
keep_alive = 1
return {
'protocol': protocol,
'dnssec_mode': dnssec_mode,
'keep_alive': keep_alive,
}
def parse_timestamp_unix(ts_str: str) -> float:
"""Convert RFC3339 timestamp to Unix epoch."""
try:
dt = date_parser.isoparse(ts_str)
return dt.timestamp()
except Exception:
return 0.0
def ns_to_ms(duration_ns: str) -> float:
"""Convert nanoseconds to milliseconds."""
try:
return float(duration_ns) / 1_000_000
except (ValueError, TypeError):
return 0.0
def find_csv_files(input_dir: Path) -> list:
"""Find all non-backup CSV files."""
files = []
for csv_path in input_dir.rglob('*.csv'):
if '.bak' in csv_path.name:
continue
files.append(csv_path)
return sorted(files)
def merge_all_csvs(input_dir: Path, output_path: Path):
"""Merge all CSVs into a single file."""
csv_files = find_csv_files(input_dir)
if not csv_files:
print("No CSV files found")
return
print(f"Found {len(csv_files)} CSV files")
# Output columns in desired order
output_columns = [
'id',
'provider',
'protocol',
'dnssec_mode',
'domain',
'query_type',
'keep_alive',
'dns_server',
'timestamp',
'timestamp_unix',
'duration_ns',
'duration_ms',
'request_size_bytes',
'response_size_bytes',
'bytes_sent',
'bytes_received',
'packets_sent',
'packets_received',
'total_bytes',
'response_code',
'error',
]
global_id = 0
total_rows = 0
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
writer = csv.DictWriter(outfile, fieldnames=output_columns)
writer.writeheader()
for csv_path in csv_files:
# Extract provider from path
provider = csv_path.parent.name.lower()
# Parse config from filename
config = parse_config(csv_path.name)
print(f" {provider}/{csv_path.name} ({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
file_rows = 0
with open(csv_path, 'r', newline='', encoding='utf-8') as infile:
reader = csv.DictReader(infile)
for row in reader:
global_id += 1
file_rows += 1
# Build output row
out_row = {
'id': global_id,
'provider': provider,
'protocol': config['protocol'],
'dnssec_mode': config['dnssec_mode'],
'keep_alive': config['keep_alive'],
'domain': row.get('domain', ''),
'query_type': row.get('query_type', ''),
'dns_server': row.get('dns_server', ''),
'timestamp': row.get('timestamp', ''),
'timestamp_unix': parse_timestamp_unix(row.get('timestamp', '')),
'duration_ns': row.get('duration_ns', ''),
'duration_ms': ns_to_ms(row.get('duration_ns', '')),
'request_size_bytes': row.get('request_size_bytes', ''),
'response_size_bytes': row.get('response_size_bytes', ''),
'bytes_sent': row.get('bytes_sent', ''),
'bytes_received': row.get('bytes_received', ''),
'packets_sent': row.get('packets_sent', ''),
'packets_received': row.get('packets_received', ''),
'total_bytes': row.get('total_bytes', ''),
'response_code': row.get('response_code', ''),
'error': row.get('error', ''),
}
writer.writerow(out_row)
total_rows += file_rows
print(f"{file_rows:,} rows")
print(f"\n{'='*60}")
print(f"Output: {output_path}")
print(f"Total rows: {total_rows:,}")
print(f"{'='*60}")
def main():
parser = argparse.ArgumentParser(
description='Merge all DNS test CSVs into a single file'
)
parser.add_argument(
'input_dir',
nargs='?',
default='.',
help='Input directory containing provider folders (default: .)'
)
parser.add_argument(
'-o', '--output',
default='dns_results.csv',
help='Output CSV path (default: dns_results.csv)'
)
args = parser.parse_args()
input_dir = Path(args.input_dir)
output_path = Path(args.output)
if not input_dir.exists():
print(f"Error: Input directory not found: {input_dir}")
return 1
print("="*60)
print("MERGE ALL DNS CSVs")
print("="*60)
print(f"Input: {input_dir}")
print(f"Output: {output_path}")
print()
merge_all_csvs(input_dir, output_path)
return 0
if __name__ == '__main__':
exit(main())
+253
View File
@@ -0,0 +1,253 @@
#!/bin/bash
# Exit on error
set -e
# Default values
TOOL_PATH="./qol"
DOMAINS_FILE="./domains.txt"
OUTPUT_DIR="./results"
INTERFACE="veth1"
TIMEOUT="5s"
SLEEP_TIME="1"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-t|--tool-path)
TOOL_PATH="$2"
shift 2
;;
-d|--domains-file)
DOMAINS_FILE="$2"
shift 2
;;
-o|--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
-I|--interface)
INTERFACE="$2"
shift 2
;;
-T|--timeout)
TIMEOUT="$2"
shift 2
;;
-s|--sleep)
SLEEP_TIME="$2"
shift 2
;;
--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -t, --tool-path PATH Path to qol tool (default: ./qol)"
echo " -d, --domains-file PATH Path to domains file (default: ./domains.txt)"
echo " -o, --output-dir PATH Output directory (default: ./results)"
echo " -I, --interface NAME Network interface (default: veth1)"
echo " -T, --timeout DURATION Timeout duration (default: 5s)"
echo " -s, --sleep SECONDS Sleep between runs (default: 1)"
echo " --help Show this help"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
echo "Configuration:"
echo " Tool path: $TOOL_PATH"
echo " Domains file: $DOMAINS_FILE"
echo " Output dir: $OUTPUT_DIR"
echo " Interface: $INTERFACE"
echo " Timeout: $TIMEOUT"
echo " Sleep time: ${SLEEP_TIME}s"
echo ""
# Server definitions as associative arrays (name -> url)
declare -A CONN_SERVERS=(
["google-dotcp"]="dotcp://8.8.8.8:53"
["cloudflare-dotcp"]="dotcp://1.1.1.1:53"
["quad9-dotcp"]="dotcp://9.9.9.9:53"
["adguard-dotcp"]="dotcp://dns.adguard-dns.com:53"
["google-dot"]="tls://8.8.8.8:853"
["cloudflare-dot"]="tls://1.1.1.1:853"
["quad9-dot"]="tls://9.9.9.9:853"
["adguard-dot"]="tls://dns.adguard-dns.com:853"
["google-doh"]="https://dns.google/dns-query"
["cloudflare-doh"]="https://cloudflare-dns.com/dns-query"
["quad9-doh"]="https://dns10.quad9.net/dns-query"
["adguard-doh"]="https://dns.adguard-dns.com/dns-query"
)
declare -A QUIC_SERVERS=(
["google-doh3"]="doh3://dns.google/dns-query"
["cloudflare-doh3"]="doh3://cloudflare-dns.com/dns-query"
["adguard-doh3"]="doh3://dns.adguard-dns.com/dns-query"
["adguard-doq"]="doq://dns.adguard-dns.com:853"
)
declare -A CONNLESS_SERVERS=(
["google-udp"]="udp://8.8.8.8:53"
["cloudflare-udp"]="udp://1.1.1.1:53"
["quad9-udp"]="udp://9.9.9.9:53"
["adguard-udp"]="udp://dns.adguard-dns.com:53"
["adguard-dnscrypt"]="sdns://AQMAAAAAAAAAETk0LjE0MC4xNC4xNDo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20"
["quad9-dnscrypt"]="sdns://AQMAAAAAAAAAFDE0OS4xMTIuMTEyLjExMjo4NDQzIGfIR7jIdYzRICRVQ751Z0bfNN8dhMALjEcDaN-CHYY-GTIuZG5zY3J5cHQtY2VydC5xdWFkOS5uZXQ"
)
# Function to get flags suffix for filename
get_flags_suffix() {
local dnssec="$1"
local auth="$2"
local keepalive="$3"
local suffix=""
if [[ "$dnssec" == "true" ]]; then
if [[ "$auth" == "true" ]]; then
suffix="auth"
else
suffix="trust"
fi
fi
if [[ "$keepalive" == "true" ]]; then
if [[ -n "$suffix" ]]; then
suffix="${suffix}-persist"
else
suffix="persist"
fi
fi
echo "$suffix"
}
# Function to run with perf and capture CPU metrics
run_with_perf() {
local name="$1"
local url="$2"
local dnssec="$3"
local auth="$4"
local keepalive="$5"
local suffix=$(get_flags_suffix "$dnssec" "$auth" "$keepalive")
local base_name="${name}"
if [[ -n "$suffix" ]]; then
base_name="${name}-${suffix}"
fi
local cpu_csv_file="$OUTPUT_DIR/${name%%-*}/${base_name}.cpu.csv" # e.g., results/cloudflare/dot-trust-persist.cpu.csv
# Write header if needed
if [[ ! -f "$cpu_csv_file" ]]; then
echo "timestamp,wall_time_seconds,instructions,cycles,peak_rss_kb" > "$cpu_csv_file"
fi
# Build command arguments
local cmd_args=(
"$DOMAINS_FILE"
--output-dir "$OUTPUT_DIR"
--interface "$INTERFACE"
--timeout "$TIMEOUT"
-s "$url"
)
if [[ "$dnssec" == "true" ]]; then
cmd_args+=(--dnssec)
if [[ "$auth" == "true" ]]; then
cmd_args+=(--auth-dnssec)
fi
fi
if [[ "$keepalive" == "true" ]]; then
cmd_args+=(--keep-alive)
fi
# Create temp files for perf and time output
local perf_tmp=$(mktemp)
local time_tmp=$(mktemp)
# Run with perf stat and /usr/bin/time
local timestamp=$(date -Iseconds)
sudo perf stat -e instructions,cycles \
-o "$perf_tmp" \
/usr/bin/time -v \
"$TOOL_PATH" run "${cmd_args[@]}" 2>"$time_tmp" || true
# Parse perf output
local instructions=$(grep -oP '\d[\d,]*(?=\s+instructions)' "$perf_tmp" 2>/dev/null | tr -d ',' || echo "0")
local cycles=$(grep -oP '\d[\d,]*(?=\s+cycles)' "$perf_tmp" 2>/dev/null | tr -d ',' || echo "0")
local wall_time=$(grep -oP '\d+\.\d+(?= seconds time elapsed)' "$perf_tmp" 2>/dev/null || echo "0")
# Parse /usr/bin/time output for peak RSS
local peak_rss=$(grep "Maximum resident set size" "$time_tmp" 2>/dev/null | grep -oP '\d+' || echo "0")
# Append to CPU CSV
echo "${timestamp},${wall_time},${instructions},${cycles},${peak_rss}" >> "$cpu_csv_file"
# Cleanup
rm -f "$perf_tmp" "$time_tmp"
echo " -> CPU metrics saved to ${base_name}.cpu.csv"
}
# Function to run servers with given flags
run_server_group() {
local -n servers=$1
local dnssec="$2"
local auth="$3"
local keepalive="$4"
local desc="$5"
echo "Running: $desc"
for name in "${!servers[@]}"; do
local url="${servers[$name]}"
echo " Processing: $name ($url)"
run_with_perf "$name" "$url" "$dnssec" "$auth" "$keepalive"
sleep "$SLEEP_TIME"
done
}
echo "=== Running TCP-based protocols (TLS/HTTPS) ==="
# DNSSEC off, Keep off
run_server_group CONN_SERVERS "false" "false" "false" "no-dnssec, no-keepalive"
# DNSSEC off, Keep on
run_server_group CONN_SERVERS "false" "false" "true" "no-dnssec, keepalive"
# DNSSEC on (trust), Keep on
run_server_group CONN_SERVERS "true" "false" "true" "dnssec-trust, keepalive"
# DNSSEC on (auth), Keep on
run_server_group CONN_SERVERS "true" "true" "true" "dnssec-auth, keepalive"
echo ""
echo "=== Running QUIC-based protocols (DoH3/DoQ) ==="
# DNSSEC off
run_server_group QUIC_SERVERS "false" "false" "false" "no-dnssec"
# DNSSEC on (trust)
run_server_group QUIC_SERVERS "true" "false" "false" "dnssec-trust"
# DNSSEC on (auth)
run_server_group QUIC_SERVERS "true" "true" "false" "dnssec-auth"
echo ""
echo "=== Running connectionless protocols (UDP) ==="
# DNSSEC off
run_server_group CONNLESS_SERVERS "false" "false" "false" "no-dnssec"
# DNSSEC on (trust)
run_server_group CONNLESS_SERVERS "true" "false" "false" "dnssec-trust"
# DNSSEC on (auth)
run_server_group CONNLESS_SERVERS "true" "true" "false" "dnssec-auth"
echo ""
echo "All combinations completed!"
+87
View File
@@ -0,0 +1,87 @@
#!/bin/bash
# Exit on error
set -e
# Default values
NETNS_NAME="myapp"
VETH_HOST="veth0"
VETH_NS="veth1"
HOST_IP="192.168.100.1"
NS_IP="192.168.100.2"
SUBNET="192.168.100.0/24"
PHYSICAL_IF="eth0"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-n|--namespace)
NETNS_NAME="$2"
shift 2
;;
-p|--physical-if)
PHYSICAL_IF="$2"
shift 2
;;
--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -n, --namespace NAME Namespace name (default: myapp)"
echo " -p, --physical-if NAME Physical interface (default: eth0)"
echo " --help Show this help"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
echo "Configuration:"
echo " Namespace: $NETNS_NAME"
echo " Physical interface: $PHYSICAL_IF"
echo ""
echo "Creating network namespace: $NETNS_NAME"
sudo ip netns add $NETNS_NAME
echo "Creating veth pair: $VETH_HOST <-> $VETH_NS"
sudo ip link add $VETH_HOST type veth peer name $VETH_NS
echo "Moving $VETH_NS into namespace"
sudo ip link set $VETH_NS netns $NETNS_NAME
echo "Configuring host side ($VETH_HOST)"
sudo ip addr add $HOST_IP/24 dev $VETH_HOST
sudo ip link set $VETH_HOST up
echo "Configuring namespace side ($VETH_NS)"
sudo ip netns exec $NETNS_NAME ip addr add $NS_IP/24 dev $VETH_NS
sudo ip netns exec $NETNS_NAME ip link set $VETH_NS up
sudo ip netns exec $NETNS_NAME ip link set lo up
sudo ip netns exec $NETNS_NAME ip route add default via $HOST_IP
echo "Enabling IP forwarding"
sudo sysctl -w net.ipv4.ip_forward=1
echo "Disabling IPv6"
sudo ip netns exec $NETNS_NAME sysctl -w net.ipv6.conf.all.disable_ipv6=1
echo "Setting up NAT"
sudo iptables -t nat -A POSTROUTING -s $SUBNET -o $PHYSICAL_IF -j MASQUERADE
echo "Setting up forwarding rules"
sudo iptables -I FORWARD -i $VETH_HOST -o $PHYSICAL_IF -j ACCEPT
sudo iptables -I FORWARD -i $PHYSICAL_IF -o $VETH_HOST -j ACCEPT
echo ""
echo "Done! Network namespace '$NETNS_NAME' is ready."
echo ""
echo "To run your app in the namespace:"
echo " sudo ip netns exec $NETNS_NAME ./your_app"
echo ""
echo "To capture traffic:"
echo " sudo tshark -i $VETH_HOST -w app.pcap"
-369
View File
@@ -1,369 +0,0 @@
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/google/gopacket"
"github.com/google/gopacket/layers"
"github.com/google/gopacket/pcapgo"
)
type QueryRecord struct {
Domain string
QueryType string
Protocol string
DNSSec string
AuthDNSSec string
KeepAlive string
DNSServer string
Timestamp string
DurationNs int64
DurationMs float64
RequestSizeBytes int
ResponseSizeBytes int
ResponseCode string
Error string
BytesSent int64
BytesReceived int64
PacketsSent int64
PacketsReceived int64
TotalBytes int64
}
func parseRFC3339Nano(ts string) (time.Time, error) {
return time.Parse(time.RFC3339Nano, ts)
}
func processProviderFolder(providerPath string) error {
providerName := filepath.Base(providerPath)
fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
files, err := os.ReadDir(providerPath)
if err != nil {
return err
}
processed := 0
skipped := 0
errors := 0
for _, file := range files {
if !strings.HasSuffix(file.Name(), ".csv") {
continue
}
csvPath := filepath.Join(providerPath, file.Name())
pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
// Check if PCAP exists
if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
skipped++
continue
}
// Check if already processed (has backup)
backupPath := csvPath + ".bak"
if _, err := os.Stat(backupPath); err == nil {
fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
skipped++
continue
}
fmt.Printf(" ↻ Processing: %s ... ", file.Name())
if err := processPair(csvPath, pcapPath); err != nil {
fmt.Printf("ERROR\n")
log.Printf(" Error: %v\n", err)
errors++
} else {
fmt.Printf("✓\n")
processed++
}
}
fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
return nil
}
func processPair(csvPath, pcapPath string) error {
// Create backup
backupPath := csvPath + ".bak"
input, err := os.ReadFile(csvPath)
if err != nil {
return fmt.Errorf("backup read failed: %w", err)
}
if err := os.WriteFile(backupPath, input, 0644); err != nil {
return fmt.Errorf("backup write failed: %w", err)
}
// Read CSV records
records, err := readCSV(csvPath)
if err != nil {
return fmt.Errorf("CSV read failed: %w", err)
}
if len(records) == 0 {
return fmt.Errorf("no records in CSV")
}
// Read and parse PCAP
packets, err := readPCAPGo(pcapPath)
if err != nil {
return fmt.Errorf("PCAP read failed: %w", err)
}
// Enrich records with bandwidth data
enrichRecords(records, packets)
// Write enriched CSV
if err := writeCSV(csvPath, records); err != nil {
return fmt.Errorf("CSV write failed: %w", err)
}
return nil
}
func readCSV(path string) ([]*QueryRecord, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
r := csv.NewReader(f)
rows, err := r.ReadAll()
if err != nil {
return nil, err
}
if len(rows) < 2 {
return nil, fmt.Errorf("CSV has no data rows")
}
records := make([]*QueryRecord, 0, len(rows)-1)
for i := 1; i < len(rows); i++ {
row := rows[i]
if len(row) < 14 {
log.Printf(" Warning: Skipping malformed row %d", i+1)
continue
}
durationNs, _ := strconv.ParseInt(row[8], 10, 64)
durationMs, _ := strconv.ParseFloat(row[9], 64)
reqSize, _ := strconv.Atoi(row[10])
respSize, _ := strconv.Atoi(row[11])
records = append(records, &QueryRecord{
Domain: row[0],
QueryType: row[1],
Protocol: row[2],
DNSSec: row[3],
AuthDNSSec: row[4],
KeepAlive: row[5],
DNSServer: row[6],
Timestamp: row[7],
DurationNs: durationNs,
DurationMs: durationMs,
RequestSizeBytes: reqSize,
ResponseSizeBytes: respSize,
ResponseCode: row[12],
Error: row[13],
})
}
return records, nil
}
type PacketInfo struct {
Timestamp time.Time
Size int
IsSent bool
}
func readPCAPGo(path string) ([]PacketInfo, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
reader, err := pcapgo.NewReader(f)
if err != nil {
return nil, err
}
var packets []PacketInfo
packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
for packet := range packetSource.Packets() {
if packet.NetworkLayer() == nil {
continue
}
isDNS := false
isSent := false
// Check UDP layer (DNS, DoQ, DoH3)
if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
udp := udpLayer.(*layers.UDP)
isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
udp.SrcPort == 853 || udp.DstPort == 853 ||
udp.SrcPort == 443 || udp.DstPort == 443
isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
}
// Check TCP layer (DoT, DoH)
if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
tcp := tcpLayer.(*layers.TCP)
isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
tcp.SrcPort == 853 || tcp.DstPort == 853 ||
tcp.SrcPort == 443 || tcp.DstPort == 443
isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
}
if isDNS {
packets = append(packets, PacketInfo{
Timestamp: packet.Metadata().Timestamp,
Size: len(packet.Data()),
IsSent: isSent,
})
}
}
return packets, nil
}
func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
for _, rec := range records {
ts, err := parseRFC3339Nano(rec.Timestamp)
if err != nil {
log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp)
continue
}
// Define time window for this query
windowStart := ts
windowEnd := ts.Add(time.Duration(rec.DurationNs))
var sent, recv, pktSent, pktRecv int64
// Match packets within the time window
for _, pkt := range packets {
if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
pkt.Timestamp.Before(windowEnd) {
if pkt.IsSent {
sent += int64(pkt.Size)
pktSent++
} else {
recv += int64(pkt.Size)
pktRecv++
}
}
}
rec.BytesSent = sent
rec.BytesReceived = recv
rec.PacketsSent = pktSent
rec.PacketsReceived = pktRecv
rec.TotalBytes = sent + recv
}
}
func writeCSV(path string, records []*QueryRecord) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
// Write header
header := []string{
"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
"request_size_bytes", "response_size_bytes", "response_code", "error",
"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
}
if err := w.Write(header); err != nil {
return err
}
// Write data rows
for _, rec := range records {
row := []string{
rec.Domain,
rec.QueryType,
rec.Protocol,
rec.DNSSec,
rec.AuthDNSSec,
rec.KeepAlive,
rec.DNSServer,
rec.Timestamp,
strconv.FormatInt(rec.DurationNs, 10),
strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
strconv.Itoa(rec.RequestSizeBytes),
strconv.Itoa(rec.ResponseSizeBytes),
rec.ResponseCode,
rec.Error,
strconv.FormatInt(rec.BytesSent, 10),
strconv.FormatInt(rec.BytesReceived, 10),
strconv.FormatInt(rec.PacketsSent, 10),
strconv.FormatInt(rec.PacketsReceived, 10),
strconv.FormatInt(rec.TotalBytes, 10),
}
if err := w.Write(row); err != nil {
return err
}
}
return nil
}
func main() {
resultsDir := "results"
providers := []string{"adguard", "cloudflare", "google", "quad9"}
fmt.Println("╔═══════════════════════════════════════════════╗")
fmt.Println("║ DNS PCAP Preprocessor v1.0 ║")
fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║")
fmt.Println("╚═══════════════════════════════════════════════╝")
totalProcessed := 0
totalSkipped := 0
totalErrors := 0
for _, provider := range providers {
providerPath := filepath.Join(resultsDir, provider)
if _, err := os.Stat(providerPath); os.IsNotExist(err) {
fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
continue
}
if err := processProviderFolder(providerPath); err != nil {
log.Printf("Error processing %s: %v\n", provider, err)
totalErrors++
}
}
fmt.Println("\n╔═══════════════════════════════════════════════╗")
fmt.Println("║ Preprocessing Complete! ║")
fmt.Println("╚═══════════════════════════════════════════════╝")
fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n")
fmt.Printf(" • bytes_received - Total bytes received from DNS server\n")
fmt.Printf(" • packets_sent - Number of packets sent\n")
fmt.Printf(" • packets_received - Number of packets received\n")
fmt.Printf(" • total_bytes - Sum of sent + received bytes\n")
fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
fmt.Printf(" but all files now have complete bandwidth metrics!\n")
}
-367
View File
@@ -1,367 +0,0 @@
#!/usr/bin/env python3
"""
Advanced PCAP filter for DNS traffic (with IPv6 support).
Filters out:
- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses)
- AdGuard DNS servers (for non-AdGuard captures)
- Non-DNS traffic based on protocol-specific ports
"""
import os
import subprocess
from pathlib import Path
import argparse
# Test machine IPs (IPv4 and IPv6 from your provided info)
TEST_IPV4 = '10.0.0.50'
TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e'
TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d'
# Port mappings
PORT_MAP = {
'udp': [53], # DNS-over-UDP
'tls': [53, 853], # DNS-over-TLS
'https': [53, 443], # DNS-over-HTTPS (DoH)
'doq': [53, 784, 8853], # DNS-over-QUIC
'doh3': [53, 443] # DNS-over-HTTP/3
}
# AdGuard DNS IPs to filter out (for non-AdGuard captures)
ADGUARD_IPS = [
'94.140.14.14',
'94.140.15.15',
'2a10:50c0::ad1:ff',
'2a10:50c0::ad2:ff'
]
def parse_filename(filename):
"""Extract protocol from filename"""
base = filename.replace('.pcap', '').replace('.csv', '')
parts = base.split('-')
if len(parts) < 1: # Minimum: protocol
return None
protocol = parts[0].lower()
return protocol
def extract_resolver_from_path(pcap_path):
"""Extract resolver name from directory structure"""
parts = Path(pcap_path).parts
for part in parts:
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
return part.lower()
return None
def build_filter_expression(protocol, resolver):
"""
Build tshark filter expression.
Strategy:
1. Only protocol-specific DNS ports
2. Keep only traffic involving the test machine (IPv4/IPv6)
3. Exclude AdGuard IPs for non-AdGuard captures
"""
# Get ports for this protocol
ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853])
# Build port filter (UDP or TCP on these ports)
port_conditions = []
for port in ports:
port_conditions.append(f'(udp.port == {port} or tcp.port == {port})')
port_filter = ' or '.join(port_conditions)
# Build test machine filter (keep if src or dst is test machine IP)
machine_conditions = [f'(ip.addr == {TEST_IPV4})']
if TEST_IPV6_GLOBAL:
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})')
if TEST_IPV6_LINKLOCAL:
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})')
machine_filter = ' or '.join(machine_conditions)
# Build AdGuard exclusion filter
adguard_exclusions = []
if resolver != 'adguard':
for ip in ADGUARD_IPS:
if ':' in ip: # IPv6
adguard_exclusions.append(f'!(ipv6.addr == {ip})')
else: # IPv4
adguard_exclusions.append(f'!(ip.addr == {ip})')
# Combine all filters
filters = [f'({port_filter})', f'({machine_filter})']
if adguard_exclusions:
adguard_filter = ' and '.join(adguard_exclusions)
filters.append(f'({adguard_filter})')
final_filter = ' and '.join(filters)
return final_filter
def filter_pcap(input_path, output_path, filter_expr, verbose=False):
"""Apply filter to PCAP file using tshark"""
cmd = [
'tshark',
'-r', input_path,
'-Y', filter_expr,
'-w', output_path,
'-F', 'pcap'
]
try:
if verbose:
print(f" Filter: {filter_expr}")
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=300
)
if result.returncode != 0:
print(f" ✗ Error: {result.stderr.strip()}")
return False
if not os.path.exists(output_path):
print(f" ✗ Output file not created")
return False
output_size = os.path.getsize(output_path)
if output_size < 24:
print(f" ⚠ Warning: Output is empty")
return True
except subprocess.TimeoutExpired:
print(f" ✗ Timeout (>5 minutes)")
return False
except Exception as e:
print(f" ✗ Exception: {e}")
return False
def find_pcap_files(root_dir):
"""Recursively find all PCAP files"""
pcap_files = []
for root, dirs, files in os.walk(root_dir):
for file in files:
if file.endswith('.pcap'):
full_path = os.path.join(root, file)
pcap_files.append(full_path)
return sorted(pcap_files)
def format_bytes(bytes_val):
"""Format bytes as human readable"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes_val < 1024.0:
return f"{bytes_val:.1f} {unit}"
bytes_val /= 1024.0
return f"{bytes_val:.1f} TB"
def main():
parser = argparse.ArgumentParser(
description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Filtering rules:
1. Only include traffic on protocol-specific DNS ports
2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses)
3. Exclude AdGuard IPs for non-AdGuard captures
Protocol-specific ports:
udp: 53
tls: 53, 853
https: 53, 443
doq: 53, 784, 8853
doh3: 53, 443
Examples:
# Dry run
%(prog)s ./results --dry-run
# Filter with verbose output
%(prog)s ./results --verbose
# Custom output directory
%(prog)s ./results --output ./cleaned
'''
)
parser.add_argument(
'input_dir',
help='Input directory containing PCAP files'
)
parser.add_argument(
'-o', '--output',
default='./results_filtered',
help='Output directory (default: ./results_filtered)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be done without filtering'
)
parser.add_argument(
'--limit',
type=int,
help='Only process first N files (for testing)'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Verbose output (show filter expressions)'
)
parser.add_argument(
'--overwrite',
action='store_true',
help='Overwrite existing filtered files'
)
args = parser.parse_args()
# Check for tshark
try:
result = subprocess.run(
['tshark', '-v'],
capture_output=True,
check=True
)
if args.verbose:
version = result.stdout.decode().split('\n')[0]
print(f"Using: {version}\n")
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: tshark not found. Install Wireshark/tshark:")
print(" Ubuntu/Debian: sudo apt-get install tshark")
print(" macOS: brew install wireshark")
return 1
print("=" * 80)
print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)")
print("=" * 80)
print("Filters:")
print(" 1. Protocol-specific DNS ports only")
print(" 2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)")
print(" 3. Exclude AdGuard IPs (for non-AdGuard captures)")
print(f"\nInput: {args.input_dir}")
print(f"Output: {args.output}")
# Find PCAP files
print(f"\nScanning for PCAP files...")
pcap_files = find_pcap_files(args.input_dir)
if not pcap_files:
print(f"No PCAP files found in {args.input_dir}")
return 1
print(f"Found {len(pcap_files)} PCAP files")
total_input_size = sum(os.path.getsize(f) for f in pcap_files)
print(f"Total size: {format_bytes(total_input_size)}")
if args.limit:
pcap_files = pcap_files[:args.limit]
print(f"Limiting to first {args.limit} files")
if args.dry_run:
print("\n*** DRY RUN MODE ***\n")
else:
print()
# Process files
success_count = 0
skip_count = 0
fail_count = 0
total_output_size = 0
for i, input_path in enumerate(pcap_files, 1):
# Extract info from path
filename = Path(input_path).name
protocol = parse_filename(filename)
resolver = extract_resolver_from_path(input_path)
if not protocol:
print(f"[{i}/{len(pcap_files)}] {filename}")
print(f" ⚠ Could not parse protocol, skipping")
skip_count += 1
continue
# Create output path
rel_path = os.path.relpath(input_path, args.input_dir)
output_path = os.path.join(args.output, rel_path)
input_size = os.path.getsize(input_path)
print(f"[{i}/{len(pcap_files)}] {rel_path}")
print(f" Protocol: {protocol.upper()}")
print(f" Resolver: {resolver or 'unknown'}")
print(f" Size: {format_bytes(input_size)}")
# Check if already filtered
if os.path.exists(output_path) and not args.overwrite:
output_size = os.path.getsize(output_path)
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
print(f" ⊙ Already filtered: {format_bytes(output_size)} "
f"({reduction:.1f}% reduction)")
skip_count += 1
total_output_size += output_size
continue
# Build filter
filter_expr = build_filter_expression(protocol, resolver)
if args.dry_run:
print(f" → Would filter")
if args.verbose:
print(f" Filter: {filter_expr}")
continue
# Create output directory
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Filter
success = filter_pcap(input_path, output_path, filter_expr, args.verbose)
if success:
output_size = os.path.getsize(output_path)
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
print(f" ✓ Filtered: {format_bytes(output_size)} "
f"({reduction:.1f}% reduction)")
success_count += 1
total_output_size += output_size
else:
fail_count += 1
# Summary
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
if args.dry_run:
print(f"Would process: {len(pcap_files)} files")
else:
print(f"Successful: {success_count}")
print(f"Skipped: {skip_count} (already filtered or unparseable)")
print(f"Failed: {fail_count}")
print(f"Total: {len(pcap_files)}")
if success_count > 0 or skip_count > 0:
print(f"\nInput size: {format_bytes(total_input_size)}")
print(f"Output size: {format_bytes(total_output_size)}")
if total_input_size > 0:
reduction = ((total_input_size - total_output_size) /
total_input_size * 100)
print(f"Reduction: {reduction:.1f}%")
print(f"\nOutput directory: {args.output}")
return 0 if fail_count == 0 else 1
if __name__ == "__main__":
exit(main())
-426
View File
@@ -1,426 +0,0 @@
#!/usr/bin/env python3
"""
Convert DNS CSV files to SQLite database.
Creates a single normalized table with unified DNSSEC handling.
"""
import sqlite3
import csv
from pathlib import Path
from dateutil import parser as date_parser
def create_database_schema(conn: sqlite3.Connection):
"""Create the database schema with indexes."""
cursor = conn.cursor()
# Main queries table
cursor.execute("""
CREATE TABLE IF NOT EXISTS dns_queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
-- Metadata
provider TEXT NOT NULL,
protocol TEXT NOT NULL,
dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
-- Query details
domain TEXT NOT NULL,
query_type TEXT NOT NULL,
keep_alive BOOLEAN NOT NULL,
dns_server TEXT NOT NULL,
-- Timing
timestamp TEXT NOT NULL,
timestamp_unix REAL NOT NULL,
duration_ns INTEGER NOT NULL,
duration_ms REAL NOT NULL,
-- Size metrics
request_size_bytes INTEGER,
response_size_bytes INTEGER,
-- Network metrics (from PCAP)
bytes_sent INTEGER DEFAULT 0,
bytes_received INTEGER DEFAULT 0,
packets_sent INTEGER DEFAULT 0,
packets_received INTEGER DEFAULT 0,
total_bytes INTEGER DEFAULT 0,
-- Response
response_code TEXT,
error TEXT
)
""")
# Create indexes for common queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_provider
ON dns_queries(provider)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_protocol
ON dns_queries(protocol)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_dnssec_mode
ON dns_queries(dnssec_mode)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_keep_alive
ON dns_queries(keep_alive)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec
ON dns_queries(provider, protocol, dnssec_mode)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_timestamp
ON dns_queries(timestamp_unix)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_domain
ON dns_queries(domain)
""")
conn.commit()
def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
"""
Extract base protocol, DNSSEC mode, and keep_alive from filename.
Returns (base_protocol, dnssec_mode, keep_alive)
Examples:
'udp.csv' -> ('udp', 'off', False)
'udp-auth.csv' -> ('udp', 'auth', False)
'tls.csv' -> ('tls', 'off', False)
'tls-persist.csv' -> ('tls', 'off', True)
'https-persist.csv' -> ('https', 'off', True)
'https-auth-persist.csv' -> ('https', 'auth', True)
'https-trust-persist.csv' -> ('https', 'trust', True)
'doh3-auth.csv' -> ('doh3', 'auth', False)
'doq.csv' -> ('doq', 'off', False)
"""
name = filename.replace('.csv', '')
# Check for persist suffix (keep_alive)
keep_alive = False
if name.endswith('-persist'):
keep_alive = True
name = name.replace('-persist', '')
# Check for DNSSEC suffix
dnssec_mode = 'off'
if name.endswith('-auth'):
dnssec_mode = 'auth'
name = name.replace('-auth', '')
elif name.endswith('-trust'):
dnssec_mode = 'trust'
name = name.replace('-trust', '')
# For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
if name in ['udp', 'doh3', 'doq']:
keep_alive = False
return (name, dnssec_mode, keep_alive)
def str_to_bool(value: str) -> bool:
"""Convert string boolean to Python bool."""
return value.lower() in ('true', '1', 'yes')
def import_csv_to_db(
csv_path: Path,
provider: str,
conn: sqlite3.Connection
) -> int:
"""Import a CSV file into the database."""
protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
cursor = conn.cursor()
rows_imported = 0
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
# Parse timestamp to Unix epoch
dt = date_parser.isoparse(row['timestamp'])
timestamp_unix = dt.timestamp()
# Use keep_alive from filename (more reliable than CSV)
keep_alive = keep_alive_from_filename
# Handle optional fields (may not exist in older CSVs)
bytes_sent = int(row.get('bytes_sent', 0) or 0)
bytes_received = int(row.get('bytes_received', 0) or 0)
packets_sent = int(row.get('packets_sent', 0) or 0)
packets_received = int(row.get('packets_received', 0) or 0)
total_bytes = int(row.get('total_bytes', 0) or 0)
cursor.execute("""
INSERT INTO dns_queries (
provider, protocol, dnssec_mode,
domain, query_type, keep_alive,
dns_server, timestamp, timestamp_unix,
duration_ns, duration_ms,
request_size_bytes, response_size_bytes,
bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
response_code, error
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
provider,
protocol,
dnssec_mode,
row['domain'],
row['query_type'],
keep_alive,
row['dns_server'],
row['timestamp'],
timestamp_unix,
int(row['duration_ns']),
float(row['duration_ms']),
int(row.get('request_size_bytes') or 0),
int(row.get('response_size_bytes') or 0),
bytes_sent,
bytes_received,
packets_sent,
packets_received,
total_bytes,
row.get('response_code', ''),
row.get('error', '')
))
rows_imported += 1
except Exception as e:
print(f" Warning: Skipping row - {e}")
continue
conn.commit()
return rows_imported
def main():
"""Main import pipeline."""
print("\n" + "="*60)
print("CSV to SQLite Database Converter")
print("="*60)
results_dir = Path('results')
db_path = Path('dns.db')
if not results_dir.exists():
print(f"\n❌ Error: '{results_dir}' directory not found")
return
# Remove existing database
if db_path.exists():
print(f"\n⚠ Removing existing database: {db_path}")
db_path.unlink()
# Create database and schema
print(f"\n📊 Creating database: {db_path}")
conn = sqlite3.connect(db_path)
create_database_schema(conn)
print("✓ Schema created")
# Import CSVs
providers = ['adguard', 'cloudflare', 'google', 'quad9']
total_rows = 0
total_files = 0
for provider in providers:
provider_path = results_dir / provider
if not provider_path.exists():
print(f"\n⚠ Skipping {provider} - directory not found")
continue
print(f"\n{'='*60}")
print(f"Importing: {provider.upper()}")
print(f"{'='*60}")
csv_files = sorted(provider_path.glob('*.csv'))
provider_rows = 0
provider_files = 0
for csv_path in csv_files:
# Skip backup files
if '.bak' in csv_path.name:
continue
protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
ka_str = "persistent" if keep_alive else "non-persist"
print(f" 📄 {csv_path.name:30}{protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
rows = import_csv_to_db(csv_path, provider, conn)
print(f" ✓ Imported {rows:,} rows")
provider_rows += rows
provider_files += 1
print(f"\n Total: {provider_files} files, {provider_rows:,} rows")
total_rows += provider_rows
total_files += provider_files
# Create summary
print(f"\n{'='*60}")
print("Database Summary")
print(f"{'='*60}")
cursor = conn.cursor()
# Total counts
cursor.execute("SELECT COUNT(*) FROM dns_queries")
total_queries = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
unique_providers = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
unique_protocols = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
unique_domains = cursor.fetchone()[0]
print(f"\nTotal queries: {total_queries:,}")
print(f"Providers: {unique_providers}")
print(f"Protocols: {unique_protocols}")
print(f"Unique domains: {unique_domains}")
# Show breakdown by provider, protocol, DNSSEC, and keep_alive
print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
print(f"{'-'*80}")
cursor.execute("""
SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
FROM dns_queries
GROUP BY provider, protocol, dnssec_mode, keep_alive
ORDER BY provider, protocol, dnssec_mode, keep_alive
""")
current_provider = None
for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
if current_provider != provider:
if current_provider is not None:
print()
current_provider = provider
ka_str = "" if keep_alive else ""
print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
# Protocol distribution
print(f"\n{'-'*80}")
print("Protocol Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT protocol, COUNT(*) as count
FROM dns_queries
GROUP BY protocol
ORDER BY protocol
""")
for protocol, count in cursor.fetchall():
pct = (count / total_queries) * 100
print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
# DNSSEC mode distribution
print(f"\n{'-'*80}")
print("DNSSEC Mode Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT dnssec_mode, COUNT(*) as count
FROM dns_queries
GROUP BY dnssec_mode
ORDER BY dnssec_mode
""")
for dnssec_mode, count in cursor.fetchall():
pct = (count / total_queries) * 100
print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
# Keep-Alive distribution
print(f"\n{'-'*80}")
print("Keep-Alive Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT keep_alive, COUNT(*) as count
FROM dns_queries
GROUP BY keep_alive
""")
for keep_alive, count in cursor.fetchall():
ka_label = "Persistent" if keep_alive else "Non-persistent"
pct = (count / total_queries) * 100
print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
conn.close()
print(f"\n{'='*60}")
print(f"✓ Database created successfully: {db_path}")
print(f" Total: {total_files} files, {total_rows:,} rows")
print(f"{'='*60}\n")
# Print usage examples
print("\n📖 Usage Examples for Metabase:")
print(f"{'-'*60}")
print("\n1. Compare protocols (DNSSEC off, persistent only):")
print(""" SELECT provider, protocol,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
WHERE dnssec_mode = 'off' AND keep_alive = 1
GROUP BY provider, protocol;""")
print("\n2. DNSSEC impact on UDP:")
print(""" SELECT provider, dnssec_mode,
AVG(duration_ms) as avg_latency
FROM dns_queries
WHERE protocol = 'udp'
GROUP BY provider, dnssec_mode;""")
print("\n3. Keep-alive impact on TLS:")
print(""" SELECT provider, keep_alive,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
WHERE protocol = 'tls' AND dnssec_mode = 'off'
GROUP BY provider, keep_alive;""")
print("\n4. Time series for line graphs:")
print(""" SELECT timestamp_unix, duration_ms, total_bytes
FROM dns_queries
WHERE provider = 'cloudflare'
AND protocol = 'https'
AND dnssec_mode = 'off'
AND keep_alive = 1
ORDER BY timestamp_unix;""")
print("\n5. Overall comparison table:")
print(""" SELECT protocol, dnssec_mode, keep_alive,
COUNT(*) as queries,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
GROUP BY protocol, dnssec_mode, keep_alive
ORDER BY protocol, dnssec_mode, keep_alive;""")
print(f"\n{'-'*60}\n")
if __name__ == '__main__':
main()
-274
View File
@@ -1,274 +0,0 @@
#!/usr/bin/env python3
"""
Merge DNS test files by configuration.
- Merges CSVs of same config (adds 'run_id' column for traceability)
- Optionally merges PCAPs using mergecap
- Flattens date structure
"""
import os
import csv
import subprocess
import shutil
from pathlib import Path
import argparse
from collections import defaultdict
def parse_filename(filename):
"""
Extract config key from filename.
Format: protocol[-flags]-timestamp.{csv,pcap}
Config key: protocol[-flags] (ignores timestamp)
"""
base = filename.replace('.csv', '').replace('.pcap', '')
parts = base.split('-')
if len(parts) < 2:
return None
# Config is everything except timestamp
config = '-'.join(parts[:-1])
timestamp = parts[-1]
return config, timestamp
def extract_resolver_from_path(file_path):
"""Extract resolver name from path"""
parts = Path(file_path).parts
for part in parts:
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
return part.lower()
return None
def find_files(root_dir, extension):
"""Find all files with given extension"""
files = []
for root, dirs, filenames in os.walk(root_dir):
for filename in filenames:
if filename.endswith(extension):
full_path = os.path.join(root, filename)
files.append(full_path)
return sorted(files)
def merge_csvs(csv_files, output_path, fieldnames):
"""Merge multiple CSVs into one, adding 'run_id' column"""
with open(output_path, 'w', newline='') as outfile:
writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id'])
writer.writeheader()
for csv_path in csv_files:
# Use timestamp as run_id
filename = Path(csv_path).name
_, timestamp = parse_filename(filename)
run_id = timestamp # Or add date if needed
with open(csv_path, 'r', newline='') as infile:
reader = csv.DictReader(infile)
for row in reader:
row['run_id'] = run_id
writer.writerow(row)
def merge_pcaps(pcap_files, output_path):
"""Merge PCAP files using mergecap"""
cmd = ['mergecap', '-w', output_path] + pcap_files
try:
subprocess.run(cmd, capture_output=True, check=True)
return True
except subprocess.CalledProcessError as e:
print(f" ✗ mergecap error: {e.stderr.decode()}")
return False
except FileNotFoundError:
print("Error: mergecap not found. Install Wireshark:")
print(" Ubuntu: sudo apt install wireshark-common")
print(" macOS: brew install wireshark")
return False
def format_bytes(bytes_val):
"""Format bytes as human readable"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes_val < 1024.0:
return f"{bytes_val:.1f} {unit}"
bytes_val /= 1024.0
return f"{bytes_val:.1f} TB"
def main():
parser = argparse.ArgumentParser(
description='Merge DNS test files by configuration',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='''
Merges files of same config across dates/timestamps.
Output: ./results_merged/[resolver]/[config].csv (merged)
./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps)
Examples:
# Dry run to preview
%(prog)s ./results --dry-run
# Merge CSVs only (recommended)
%(prog)s ./results
# Merge CSVs and PCAPs
%(prog)s ./results --merge-pcaps
# Custom output directory
%(prog)s ./results --output ./merged_data
'''
)
parser.add_argument(
'input_dir',
help='Input directory (e.g., ./results)'
)
parser.add_argument(
'--output',
default='./results_merged',
help='Output directory (default: ./results_merged)'
)
parser.add_argument(
'--merge-pcaps',
action='store_true',
help='Merge PCAP files (requires mergecap from Wireshark)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be done without merging'
)
parser.add_argument(
'-y', '--yes',
action='store_true',
help='Skip confirmation prompt'
)
args = parser.parse_args()
if not os.path.isdir(args.input_dir):
print(f"Error: Input directory not found: {args.input_dir}")
return 1
# Find all files
print("=" * 80)
print("MERGE DNS TEST FILES")
print("=" * 80)
print(f"Input: {args.input_dir}")
print(f"Output: {args.output}")
print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}")
csv_files = find_files(args.input_dir, '.csv')
pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else []
if not csv_files and not pcap_files:
print("\nNo CSV/PCAP files found")
return 1
print(f"\nFound {len(csv_files)} CSV files")
if args.merge_pcaps:
print(f"Found {len(pcap_files)} PCAP files")
# Group files by resolver and config
csv_groups = defaultdict(list)
pcap_groups = defaultdict(list)
for csv_path in csv_files:
config, _ = parse_filename(Path(csv_path).name)
resolver = extract_resolver_from_path(csv_path)
if config and resolver:
key = (resolver, config)
csv_groups[key].append(csv_path)
for pcap_path in pcap_files:
config, _ = parse_filename(Path(pcap_path).name)
resolver = extract_resolver_from_path(pcap_path)
if config and resolver:
key = (resolver, config)
pcap_groups[key].append(pcap_path)
# Summary
print("\nConfigs to merge:")
print("-" * 80)
for (resolver, config), files in sorted(csv_groups.items()):
print(f" {resolver}/{config}: {len(files)} runs")
total_runs = sum(len(files) for files in csv_groups.values())
print(f"\nTotal configs: {len(csv_groups)}")
print(f"Total runs: {total_runs}")
if args.dry_run:
print("\n*** DRY RUN MODE ***\n")
for (resolver, config) in sorted(csv_groups.keys()):
print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)")
if args.merge_pcaps and (resolver, config) in pcap_groups:
print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)")
return 0
# Confirmation
if not args.yes:
response = input(f"\nMerge all into {args.output}? [y/N] ")
if response.lower() not in ['y', 'yes']:
print("Cancelled")
return 0
# Merge
print("\n" + "=" * 80)
print("MERGING FILES")
print("=" * 80)
success_count = 0
fail_count = 0
total_queries = 0
total_size = 0
# Get standard CSV fieldnames (from first file)
first_csv = next(iter(csv_files))
with open(first_csv, 'r') as f:
reader = csv.DictReader(f)
fieldnames = reader.fieldnames
for (resolver, config), files in sorted(csv_groups.items()):
print(f"\n{resolver}/{config} ({len(files)} runs)")
# Merge CSVs
output_csv = os.path.join(args.output, resolver, f"{config}.csv")
os.makedirs(os.path.dirname(output_csv), exist_ok=True)
merge_csvs(files, output_csv, fieldnames)
# Count queries in merged file
with open(output_csv, 'r') as f:
query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header
print(f" ✓ Merged CSV: {query_count:,} queries")
total_queries += query_count
success_count += 1
# Merge PCAPs if requested
if args.merge_pcaps and (resolver, config) in pcap_groups:
output_pcap = os.path.join(args.output, resolver, f"{config}.pcap")
pcap_list = pcap_groups[(resolver, config)]
if merge_pcaps(pcap_list, output_pcap):
merged_size = os.path.getsize(output_pcap)
orig_size = sum(os.path.getsize(p) for p in pcap_list)
print(f" ✓ Merged PCAP: {format_bytes(merged_size)} "
f"(from {format_bytes(orig_size)})")
total_size += merged_size
else:
print(f" ✗ PCAP merge failed")
fail_count += 1
# Final summary
print("\n" + "=" * 80)
print("COMPLETE")
print("=" * 80)
print(f"Successful configs: {success_count}")
print(f"Failed: {fail_count}")
print(f"Total queries: {total_queries:,}")
if args.merge_pcaps:
print(f"Total PCAP size: {format_bytes(total_size)}")
print(f"\nMerged files in: {args.output}")
return 0 if fail_count == 0 else 1
if __name__ == "__main__":
exit(main())