537 lines
23 KiB
Python
537 lines
23 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import numpy as np
|
|
from pathlib import Path
|
|
import datetime
|
|
from dateutil import parser as date_parser
|
|
import dpkt
|
|
|
|
# Set style
|
|
sns.set_style("whitegrid")
|
|
plt.rcParams['figure.dpi'] = 300
|
|
plt.rcParams['savefig.dpi'] = 300
|
|
plt.rcParams['font.size'] = 10
|
|
|
|
class FastDNSAnalyzer:
|
|
def __init__(self, results_dir='results'):
|
|
self.results_dir = Path(results_dir)
|
|
self.all_data = []
|
|
|
|
def should_include_file(self, filename):
|
|
"""Filter out DNSSEC and non-persist files"""
|
|
name = filename.stem
|
|
if 'auth' in name or 'trust' in name:
|
|
return False
|
|
if name in ['tls', 'https']:
|
|
return False
|
|
return True
|
|
|
|
def parse_rfc3339_nano(self, timestamp_str):
|
|
"""Parse RFC3339Nano timestamp with timezone"""
|
|
try:
|
|
dt = date_parser.parse(timestamp_str)
|
|
return dt.astimezone(datetime.timezone.utc).timestamp()
|
|
except Exception as e:
|
|
print(f" Error parsing timestamp {timestamp_str}: {e}")
|
|
return None
|
|
|
|
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
|
|
"""Fast bandwidth extraction using dpkt"""
|
|
print(f" Analyzing pcap: {pcap_file.name}")
|
|
|
|
try:
|
|
with open(pcap_file, 'rb') as f:
|
|
pcap = dpkt.pcap.Reader(f)
|
|
|
|
# Build query time windows
|
|
query_windows = []
|
|
for idx, row in csv_data.iterrows():
|
|
start_time = self.parse_rfc3339_nano(row['timestamp'])
|
|
if start_time is None:
|
|
continue
|
|
|
|
duration_seconds = row['duration_ns'] / 1_000_000_000
|
|
end_time = start_time + duration_seconds
|
|
|
|
query_windows.append({
|
|
'index': idx,
|
|
'start': start_time,
|
|
'end': end_time,
|
|
'bytes_sent': 0,
|
|
'bytes_received': 0,
|
|
'packets_sent': 0,
|
|
'packets_received': 0
|
|
})
|
|
|
|
if not query_windows:
|
|
print(" ✗ No valid query windows")
|
|
return None
|
|
|
|
# Sort windows for faster matching
|
|
query_windows.sort(key=lambda x: x['start'])
|
|
|
|
# Process packets
|
|
packet_count = 0
|
|
matched_count = 0
|
|
|
|
for timestamp, buf in pcap:
|
|
packet_count += 1
|
|
packet_size = len(buf)
|
|
|
|
# Quick parse to determine direction
|
|
try:
|
|
eth = dpkt.ethernet.Ethernet(buf)
|
|
|
|
# Get IP layer
|
|
if isinstance(eth.data, dpkt.ip.IP):
|
|
ip = eth.data
|
|
elif isinstance(eth.data, dpkt.ip6.IP6):
|
|
ip = eth.data
|
|
else:
|
|
continue
|
|
|
|
# Get transport layer
|
|
if isinstance(ip.data, dpkt.udp.UDP):
|
|
transport = ip.data
|
|
src_port = transport.sport
|
|
dst_port = transport.dport
|
|
elif isinstance(ip.data, dpkt.tcp.TCP):
|
|
transport = ip.data
|
|
src_port = transport.sport
|
|
dst_port = transport.dport
|
|
else:
|
|
continue
|
|
|
|
# Determine direction (client port usually higher)
|
|
is_outbound = src_port > dst_port
|
|
|
|
# Binary search for matching window
|
|
for window in query_windows:
|
|
if window['start'] <= timestamp <= window['end']:
|
|
if is_outbound:
|
|
window['bytes_sent'] += packet_size
|
|
window['packets_sent'] += 1
|
|
else:
|
|
window['bytes_received'] += packet_size
|
|
window['packets_received'] += 1
|
|
matched_count += 1
|
|
break
|
|
elif timestamp < window['start']:
|
|
break # No more windows to check
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
|
|
|
|
# Convert to DataFrame
|
|
bandwidth_df = pd.DataFrame(query_windows)
|
|
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
|
|
'packets_sent', 'packets_received']]
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Error reading pcap: {e}")
|
|
return None
|
|
|
|
def load_data(self):
|
|
"""Load all relevant CSV files and extract bandwidth from pcaps"""
|
|
print("Loading data and analyzing bandwidth...")
|
|
|
|
for provider_dir in self.results_dir.iterdir():
|
|
if not provider_dir.is_dir():
|
|
continue
|
|
|
|
provider = provider_dir.name
|
|
|
|
for csv_file in provider_dir.glob('*.csv'):
|
|
if not self.should_include_file(csv_file):
|
|
continue
|
|
|
|
try:
|
|
df = pd.read_csv(csv_file)
|
|
df['provider'] = provider
|
|
df['test_file'] = csv_file.stem
|
|
df['csv_path'] = str(csv_file)
|
|
|
|
# Find corresponding pcap file
|
|
pcap_file = csv_file.with_suffix('.pcap')
|
|
if pcap_file.exists():
|
|
print(f" Processing: {provider}/{csv_file.name}")
|
|
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
|
|
|
|
if bandwidth_data is not None and len(bandwidth_data) > 0:
|
|
# Merge bandwidth data
|
|
df = df.reset_index(drop=True)
|
|
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
|
|
df[col] = 0
|
|
|
|
for _, row in bandwidth_data.iterrows():
|
|
idx = int(row['index'])
|
|
if idx < len(df):
|
|
df.at[idx, 'bytes_sent'] = row['bytes_sent']
|
|
df.at[idx, 'bytes_received'] = row['bytes_received']
|
|
df.at[idx, 'packets_sent'] = row['packets_sent']
|
|
df.at[idx, 'packets_received'] = row['packets_received']
|
|
|
|
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
|
|
|
|
print(f" ✓ Extracted bandwidth for {len(df)} queries")
|
|
else:
|
|
print(f" ⚠ Could not extract bandwidth data")
|
|
else:
|
|
print(f" ⚠ No pcap found for {csv_file.name}")
|
|
|
|
self.all_data.append(df)
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Error loading {csv_file}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
print(f"\nTotal files loaded: {len(self.all_data)}")
|
|
|
|
def create_line_graphs(self, output_dir='output/line_graphs'):
|
|
"""Create line graphs for latency and bandwidth"""
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
print("\nGenerating line graphs...")
|
|
|
|
for df in self.all_data:
|
|
provider = df['provider'].iloc[0]
|
|
test_name = df['test_file'].iloc[0]
|
|
|
|
df['query_index'] = range(1, len(df) + 1)
|
|
|
|
# Create figure with 2 subplots
|
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
|
|
|
|
# Plot 1: Latency
|
|
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
|
|
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
|
|
mean_latency = df['duration_ms'].mean()
|
|
ax1.axhline(y=mean_latency, color='r', linestyle='--',
|
|
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
|
|
ax1.set_xlabel('Query Number', fontsize=12)
|
|
ax1.set_ylabel('Latency (ms)', fontsize=12)
|
|
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# Plot 2: Bandwidth
|
|
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
|
|
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
|
|
markersize=4, linewidth=1, alpha=0.7,
|
|
color='orange', label='Sent')
|
|
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
|
|
markersize=4, linewidth=1, alpha=0.7,
|
|
color='green', label='Received')
|
|
|
|
mean_sent = df['bytes_sent'].mean()
|
|
mean_received = df['bytes_received'].mean()
|
|
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
|
|
linewidth=1.5, alpha=0.5)
|
|
ax2.axhline(y=mean_received, color='green', linestyle='--',
|
|
linewidth=1.5, alpha=0.5)
|
|
|
|
ax2.set_xlabel('Query Number', fontsize=12)
|
|
ax2.set_ylabel('Bytes', fontsize=12)
|
|
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
|
|
fontsize=12, fontweight='bold')
|
|
ax2.legend()
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
fig.suptitle(f'{provider.upper()} - {test_name}',
|
|
fontsize=14, fontweight='bold')
|
|
plt.tight_layout()
|
|
|
|
filename = f"{provider}_{test_name}.png"
|
|
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
|
|
plt.close()
|
|
|
|
print(f" ✓ Created: {filename}")
|
|
|
|
def get_protocol_name(self, test_file):
|
|
"""Extract clean protocol name"""
|
|
name = test_file.replace('-persist', '')
|
|
|
|
protocol_map = {
|
|
'udp': 'Plain DNS (UDP)',
|
|
'tls': 'DoT (DNS over TLS)',
|
|
'https': 'DoH (DNS over HTTPS)',
|
|
'doh3': 'DoH/3 (DNS over HTTP/3)',
|
|
'doq': 'DoQ (DNS over QUIC)'
|
|
}
|
|
|
|
return protocol_map.get(name, name.upper())
|
|
|
|
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
|
|
"""Create bar graphs comparing resolvers for latency and bandwidth"""
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
print("\nGenerating resolver comparison graphs...")
|
|
|
|
combined_df = pd.concat(self.all_data, ignore_index=True)
|
|
protocols = combined_df['test_file'].unique()
|
|
|
|
for protocol in protocols:
|
|
protocol_data = combined_df[combined_df['test_file'] == protocol]
|
|
protocol_name = self.get_protocol_name(protocol)
|
|
|
|
# Latency stats
|
|
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
|
|
('mean', 'mean'),
|
|
('median', 'median'),
|
|
('std', 'std')
|
|
]).reset_index()
|
|
|
|
# Create latency comparison
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
fig.suptitle(f'{protocol_name} - Latency Comparison',
|
|
fontsize=16, fontweight='bold')
|
|
|
|
# Mean latency
|
|
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
|
|
color='steelblue', alpha=0.8, edgecolor='black')
|
|
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
|
|
yerr=latency_stats['std'], fmt='none', color='black',
|
|
capsize=5, alpha=0.6)
|
|
|
|
for bar in bars1:
|
|
height = bar.get_height()
|
|
ax1.text(bar.get_x() + bar.get_width()/2., height,
|
|
f'{height:.2f}',
|
|
ha='center', va='bottom', fontweight='bold')
|
|
|
|
ax1.set_xlabel('Resolver', fontsize=12)
|
|
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
|
|
ax1.set_title('Mean Latency', fontsize=12)
|
|
ax1.grid(axis='y', alpha=0.3)
|
|
|
|
# Median latency
|
|
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
|
|
color='coral', alpha=0.8, edgecolor='black')
|
|
|
|
for bar in bars2:
|
|
height = bar.get_height()
|
|
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
|
f'{height:.2f}',
|
|
ha='center', va='bottom', fontweight='bold')
|
|
|
|
ax2.set_xlabel('Resolver', fontsize=12)
|
|
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
|
|
ax2.set_title('Median Latency', fontsize=12)
|
|
ax2.grid(axis='y', alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
|
|
plt.close()
|
|
print(f" ✓ Created: latency_{protocol}.png")
|
|
|
|
# Bandwidth comparison
|
|
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
|
|
bandwidth_stats = protocol_data.groupby('provider').agg({
|
|
'bytes_sent': 'mean',
|
|
'bytes_received': 'mean',
|
|
'total_bytes': 'mean'
|
|
}).reset_index()
|
|
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
|
|
fontsize=16, fontweight='bold')
|
|
|
|
# Sent vs Received
|
|
x = np.arange(len(bandwidth_stats))
|
|
width = 0.35
|
|
|
|
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
|
|
label='Sent', color='orange', alpha=0.8, edgecolor='black')
|
|
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
|
|
label='Received', color='green', alpha=0.8, edgecolor='black')
|
|
|
|
ax1.set_xlabel('Resolver', fontsize=12)
|
|
ax1.set_ylabel('Bytes per Query', fontsize=12)
|
|
ax1.set_title('Average Bandwidth per Query', fontsize=12)
|
|
ax1.set_xticks(x)
|
|
ax1.set_xticklabels(bandwidth_stats['provider'])
|
|
ax1.legend()
|
|
ax1.grid(axis='y', alpha=0.3)
|
|
|
|
# Total bandwidth
|
|
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
|
|
color='purple', alpha=0.8, edgecolor='black')
|
|
|
|
for bar in bars3:
|
|
height = bar.get_height()
|
|
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
|
f'{height:.0f}',
|
|
ha='center', va='bottom', fontweight='bold')
|
|
|
|
ax2.set_xlabel('Resolver', fontsize=12)
|
|
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
|
|
ax2.set_title('Total Bandwidth per Query', fontsize=12)
|
|
ax2.grid(axis='y', alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
|
|
plt.close()
|
|
print(f" ✓ Created: bandwidth_{protocol}.png")
|
|
|
|
def generate_latex_tables(self, output_dir='output/tables'):
|
|
"""Generate LaTeX tables with latency and bandwidth statistics"""
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
print("\nGenerating LaTeX tables...")
|
|
|
|
combined_df = pd.concat(self.all_data, ignore_index=True)
|
|
|
|
# Generate latency table for each resolver
|
|
for provider in combined_df['provider'].unique():
|
|
provider_data = combined_df[combined_df['provider'] == provider]
|
|
|
|
stats = provider_data.groupby('test_file')['duration_ms'].agg([
|
|
('Mean', 'mean'),
|
|
('Median', 'median'),
|
|
('Std Dev', 'std'),
|
|
('P95', lambda x: x.quantile(0.95)),
|
|
('P99', lambda x: x.quantile(0.99))
|
|
]).round(2)
|
|
|
|
stats.index = stats.index.map(self.get_protocol_name)
|
|
stats.index.name = 'Protocol'
|
|
|
|
latex_code = stats.to_latex(
|
|
caption=f'{provider.upper()} - Latency Statistics (ms)',
|
|
label=f'tab:{provider}_latency',
|
|
float_format="%.2f"
|
|
)
|
|
|
|
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
|
|
f.write(latex_code)
|
|
|
|
print(f" ✓ Created: {provider}_latency.tex")
|
|
|
|
# Generate bandwidth table for each resolver
|
|
for provider in combined_df['provider'].unique():
|
|
provider_data = combined_df[combined_df['provider'] == provider]
|
|
|
|
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
|
|
continue
|
|
|
|
bandwidth_stats = provider_data.groupby('test_file').agg({
|
|
'bytes_sent': 'mean',
|
|
'bytes_received': 'mean',
|
|
'total_bytes': 'mean'
|
|
}).round(2)
|
|
|
|
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
|
|
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
|
|
bandwidth_stats.index.name = 'Protocol'
|
|
|
|
latex_code = bandwidth_stats.to_latex(
|
|
caption=f'{provider.upper()} - Bandwidth Statistics',
|
|
label=f'tab:{provider}_bandwidth',
|
|
float_format="%.2f"
|
|
)
|
|
|
|
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
|
|
f.write(latex_code)
|
|
|
|
print(f" ✓ Created: {provider}_bandwidth.tex")
|
|
|
|
# Generate protocol efficiency table
|
|
print("\nGenerating protocol efficiency table...")
|
|
|
|
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
|
|
protocol_bandwidth = combined_df.groupby('test_file').agg({
|
|
'bytes_sent': 'mean',
|
|
'bytes_received': 'mean',
|
|
'total_bytes': 'mean'
|
|
}).round(2)
|
|
|
|
# Find UDP baseline
|
|
udp_baseline = None
|
|
for protocol in protocol_bandwidth.index:
|
|
if 'udp' in protocol:
|
|
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
|
|
break
|
|
|
|
if udp_baseline and udp_baseline > 0:
|
|
protocol_bandwidth['Overhead vs UDP (%)'] = (
|
|
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
|
|
).round(1)
|
|
protocol_bandwidth['Efficiency (%)'] = (
|
|
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
|
|
).round(1)
|
|
|
|
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
|
|
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
|
|
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
|
|
protocol_bandwidth.index.name = 'Protocol'
|
|
|
|
latex_code = protocol_bandwidth.to_latex(
|
|
caption='Protocol Bandwidth Efficiency Comparison',
|
|
label='tab:protocol_efficiency',
|
|
float_format="%.2f"
|
|
)
|
|
|
|
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
|
|
f.write(latex_code)
|
|
|
|
print(f" ✓ Created: protocol_efficiency.tex")
|
|
print("\n--- Protocol Efficiency ---")
|
|
print(protocol_bandwidth.to_string())
|
|
|
|
# Generate combined comparison tables
|
|
for metric in ['Mean', 'Median', 'P95']:
|
|
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
|
|
('Mean', 'mean'),
|
|
('Median', 'median'),
|
|
('P95', lambda x: x.quantile(0.95))
|
|
]).round(2)
|
|
|
|
pivot_table = comparison_stats[metric].unstack(level=0)
|
|
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
|
|
pivot_table.index.name = 'Protocol'
|
|
|
|
latex_code = pivot_table.to_latex(
|
|
caption=f'Resolver Latency Comparison - {metric} (ms)',
|
|
label=f'tab:comparison_{metric.lower()}',
|
|
float_format="%.2f"
|
|
)
|
|
|
|
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
|
|
f.write(latex_code)
|
|
|
|
print(f" ✓ Created: comparison_{metric.lower()}.tex")
|
|
|
|
def run_analysis(self):
|
|
"""Run the complete analysis"""
|
|
print("="*80)
|
|
print("Fast DNS QoS Analysis with Bandwidth")
|
|
print("="*80)
|
|
|
|
self.load_data()
|
|
|
|
if not self.all_data:
|
|
print("\n⚠ No data loaded.")
|
|
return
|
|
|
|
print("\n" + "="*80)
|
|
self.create_line_graphs()
|
|
|
|
print("\n" + "="*80)
|
|
self.create_resolver_comparison_bars()
|
|
|
|
print("\n" + "="*80)
|
|
self.generate_latex_tables()
|
|
|
|
print("\n" + "="*80)
|
|
print("✓ Analysis Complete!")
|
|
print("="*80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
analyzer = FastDNSAnalyzer(results_dir='results')
|
|
analyzer.run_analysis()
|