feat(profiling): Add CPU and MEM profiling

2026-04-06 13:15:09 +01:00
parent 0dc77c4583
commit a2432e7aa6
17 changed files with 472 additions and 2741 deletions
@@ -1,27 +0,0 @@
-{
-  "nodes": {
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1760103332,
-        "narHash": "sha256-BMsGVfKl4Q80Pr9T1AkCRljO1bpwCmY8rTBVj8XGuhA=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "870493f9a8cb0b074ae5b411b2f232015db19a65",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "nixpkgs-unstable",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "nixpkgs": "nixpkgs"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}
@@ -1,38 +0,0 @@
-{
-  description = "A Nix-flake-based Go 1.22 development environment";
-
-  inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
-
-  outputs = inputs:
-    let
-      goVersion = 25; # Change this to update the whole stack
-
-      supportedSystems = [ "aarch64-darwin" ];
-      forEachSupportedSystem = f: inputs.nixpkgs.lib.genAttrs supportedSystems (system: f {
-        pkgs = import inputs.nixpkgs {
-          inherit system;
-          overlays = [ inputs.self.overlays.default ];
-        };
-      });
-    in
-    {
-      overlays.default = final: prev: {
-        go = final."go_1_${toString goVersion}";
-      };
-
-      devShells = forEachSupportedSystem ({ pkgs }: {
-        default = pkgs.mkShell {
-          packages = with pkgs; [
-            go
-            gotools
-            golangci-lint
-            (pkgs.python3.withPackages (python-pkgs: with python-pkgs; [
-              pandas
-              matplotlib
-              seaborn
-            ]))
-          ];
-        };
-      });
-    };
-}
@@ -12,6 +12,7 @@ import (
 	"github.com/afonsofrancof/sdns-proxy/client"
 	"github.com/afonsofrancof/sdns-proxy/internal/qol/capture"
 	"github.com/afonsofrancof/sdns-proxy/internal/qol/results"
+	"github.com/afonsofrancof/sdns-proxy/internal/qol/stats"
 	"github.com/google/gopacket/pcap"
 	"github.com/miekg/dns"
 )
@@ -86,13 +87,16 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT
 	defer dnsClient.Close()

 	// Setup output files
-	csvPath, pcapPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)
+	csvPath, pcapPath, memPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)

 	// Create directory if it doesn't exist
 	if err := os.MkdirAll(filepath.Dir(csvPath), 0755); err != nil {
 		return fmt.Errorf("failed to create output directory: %w", err)
 	}

+	// Initialize runtime collector with memPath
+	runtimeCollector := stats.NewRuntimeCollector(memPath)
+
 	keepAliveStr := ""
 	if r.config.KeepAlive {
 		keepAliveStr = " (keep-alive)"
@@ -118,7 +122,17 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT

 	time.Sleep(time.Second)
 	// Run measurements
-	return r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
+	err = r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
+	if err != nil {
+		return err
+	}
+
+	// Write summed mem stats for the entire run
+	if err := runtimeCollector.WriteStats(); err != nil {
+		fmt.Fprintf(os.Stderr, "Warning: failed to write mem stats to %s: %v\n", memPath, err)
+	}
+
+	return nil
 }

 func (r *MeasurementRunner) runQueries(dnsClient client.DNSClient, upstream string,
@@ -0,0 +1,94 @@
+package stats
+
+import (
+	"encoding/csv"
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+)
+
+type RuntimeStats struct {
+	TotalAlloc   uint64
+	Mallocs      uint64
+	NumGC        uint32
+	AllocDelta   uint64
+	MallocsDelta uint64
+	GCDelta      uint32
+}
+
+type RuntimeCollector struct {
+	startStats runtime.MemStats
+	memPath    string
+}
+
+func NewRuntimeCollector(memPath string) *RuntimeCollector {
+	var stats runtime.MemStats
+	runtime.ReadMemStats(&stats)
+
+	return &RuntimeCollector{
+		startStats: stats,
+		memPath:    memPath,
+	}
+}
+
+func (rc *RuntimeCollector) Collect() RuntimeStats {
+	var current runtime.MemStats
+	runtime.ReadMemStats(&current)
+
+	return RuntimeStats{
+		TotalAlloc:   current.TotalAlloc,
+		Mallocs:      current.Mallocs,
+		NumGC:        current.NumGC,
+		AllocDelta:   current.TotalAlloc - rc.startStats.TotalAlloc,
+		MallocsDelta: current.Mallocs - rc.startStats.Mallocs,
+		GCDelta:      current.NumGC - rc.startStats.NumGC,
+	}
+}
+
+func (rc *RuntimeCollector) WriteStats() error {
+	stats := rc.Collect()
+	timestamp := time.Now().Format(time.RFC3339Nano)
+
+	// Check if file exists
+	fileExists := false
+	if _, err := os.Stat(rc.memPath); err == nil {
+		fileExists = true
+	}
+
+	// Open in append mode
+	file, err := os.OpenFile(rc.memPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to open mem.csv: %w", err)
+	}
+	defer file.Close()
+
+	writer := csv.NewWriter(file)
+
+	// Write header if new file
+	if !fileExists {
+		header := []string{
+			"timestamp", "total_alloc_bytes", "mallocs", "gc_cycles",
+			"alloc_delta", "mallocs_delta", "gc_delta",
+		}
+		if err := writer.Write(header); err != nil {
+			return fmt.Errorf("failed to write mem.csv header: %w", err)
+		}
+	}
+
+	row := []string{
+		timestamp,
+		fmt.Sprintf("%d", stats.TotalAlloc),
+		fmt.Sprintf("%d", stats.Mallocs),
+		fmt.Sprintf("%d", stats.NumGC),
+		fmt.Sprintf("%d", stats.AllocDelta),
+		fmt.Sprintf("%d", stats.MallocsDelta),
+		fmt.Sprintf("%d", stats.GCDelta),
+	}
+	if err := writer.Write(row); err != nil {
+		return fmt.Errorf("failed to write mem.csv row: %w", err)
+	}
+
+	writer.Flush()
+	return writer.Error()
+}
@@ -1,4 +1,3 @@
-// ./internal/qol/utils.go
 package qol

 import (
@@ -8,7 +7,7 @@ import (
 	"strings"
 )

-func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath string) {
+func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath, memPath string) {
 	proto := DetectProtocol(upstream)
 	cleanServer := cleanServerName(upstream)

@@ -32,8 +31,10 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli
 		base = fmt.Sprintf("%s-%s", base, strings.Join(flags, "-"))
 	}

-	return filepath.Join(subDir, base+".csv"),
-		filepath.Join(subDir, base+".pcap")
+	csvPath = filepath.Join(subDir, base+".csv")
+	pcapPath = filepath.Join(subDir, base+".pcap")
+	memPath = filepath.Join(subDir, base+".mem.csv")
+	return
 }

 func cleanServerName(server string) string {
@@ -84,7 +85,6 @@ func DetectProtocol(upstream string) string {
 		u, err := url.Parse(upstream)
 		if err == nil && u.Scheme != "" {
 			scheme := strings.ToLower(u.Scheme)
-			// Normalize scheme names
 			switch scheme {
 			case "udp", "doudp":
 				return "doudp"
@@ -1,187 +0,0 @@
-#!/bin/bash
-
-# Exit on error
-set -e
-
-# Default values
-TOOL_PATH="./qol"
-DOMAINS_FILE="./domains.txt"
-OUTPUT_DIR="./results"
-INTERFACE="veth1"
-TIMEOUT="5s"
-SLEEP_TIME="1"
-
-# Parse arguments
-while [[ $# -gt 0 ]]; do
-  case $1 in
-    -t|--tool-path)
-      TOOL_PATH="$2"
-      shift 2
-      ;;
-    -d|--domains-file)
-      DOMAINS_FILE="$2"
-      shift 2
-      ;;
-    -o|--output-dir)
-      OUTPUT_DIR="$2"
-      shift 2
-      ;;
-    -I|--interface)
-      INTERFACE="$2"
-      shift 2
-      ;;
-    -T|--timeout)
-      TIMEOUT="$2"
-      shift 2
-      ;;
-    -s|--sleep)
-      SLEEP_TIME="$2"
-      shift 2
-      ;;
-    --help)
-      echo "Usage: $0 [OPTIONS]"
-      echo ""
-      echo "Options:"
-      echo "  -t, --tool-path PATH      Path to qol tool (default: ./qol)"
-      echo "  -d, --domains-file PATH   Path to domains file (default: ./domains.txt)"
-      echo "  -o, --output-dir PATH     Output directory (default: ./results)"
-      echo "  -I, --interface NAME      Network interface (default: veth1)"
-      echo "  -T, --timeout DURATION    Timeout duration (default: 5s)"
-      echo "  -s, --sleep SECONDS       Sleep between runs (default: 1)"
-      echo "  --help                    Show this help"
-      exit 0
-      ;;
-    *)
-      echo "Unknown option: $1"
-      echo "Use --help for usage information"
-      exit 1
-      ;;
-  esac
-done
-
-echo "Configuration:"
-echo "  Tool path: $TOOL_PATH"
-echo "  Domains file: $DOMAINS_FILE"
-echo "  Output dir: $OUTPUT_DIR"
-echo "  Interface: $INTERFACE"
-echo "  Timeout: $TIMEOUT"
-echo "  Sleep time: ${SLEEP_TIME}s"
-echo ""
-
-# Connection-based protocols that benefit from keep-alive (TCP-based)
-CONN_SERVERS=(
-    -s "dotcp://8.8.8.8:53"
-    -s "dotcp://1.1.1.1:53"
-    -s "dotcp://9.9.9.9:53"
-    -s "dotcp://dns.adguard-dns.com:53"
-    #-s "tls://8.8.8.8:853"
-    #-s "tls://1.1.1.1:853"
-    #-s "tls://9.9.9.9:853"
-    #-s "tls://dns.adguard-dns.com:853"
-    #-s "https://dns.google/dns-query"
-    #-s "https://cloudflare-dns.com/dns-query"
-    #-s "https://dns10.quad9.net/dns-query"
-    #-s "https://dns.adguard-dns.com/dns-query"
-)
-
-# QUIC-based protocols (have built-in 0-RTT, keep-alive doesn't add value)
-QUIC_SERVERS=(
-    #-s "doh3://dns.google/dns-query"
-    #-s "doh3://cloudflare-dns.com/dns-query"
-    #-s "doh3://dns.adguard-dns.com/dns-query"
-    #-s "doq://dns.adguard-dns.com:853"
-)
-
-# Connectionless protocols (no keep-alive)
-CONNLESS_SERVERS=(
-    # -s "udp://8.8.8.8:53"
-    # -s "udp://1.1.1.1:53"
-    # -s "udp://9.9.9.9:53"
-    # -s "udp://dns.adguard-dns.com:53"
-    -s "sdns://AQMAAAAAAAAAETk0LjE0MC4xNC4xNDo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20"
-    -s "sdns://AQMAAAAAAAAAFDE0OS4xMTIuMTEyLjExMjo4NDQzIGfIR7jIdYzRICRVQ751Z0bfNN8dhMALjEcDaN-CHYY-GTIuZG5zY3J5cHQtY2VydC5xdWFkOS5uZXQ"
-)
-
-# Common args
-COMMON_ARGS=(
-    "$DOMAINS_FILE"
-    --interface "$INTERFACE"
-    --timeout "$TIMEOUT"
-)
-
-# Combinations for TCP-based connection protocols
-CONN_COMBINATIONS=(
-    # DNSSEC off, Keep off
-    ""
-    
-    # DNSSEC off, Keep on
-    "--keep-alive"
-    
-    # DNSSEC on (trust), Keep on
-    "--dnssec --keep-alive"
-    
-    # DNSSEC on (auth), Keep on
-    "--dnssec --authoritative-dnssec --keep-alive"
-)
-
-# Combinations for QUIC and connectionless protocols (no keep-alive)
-NO_KEEPALIVE_COMBINATIONS=(
-    # DNSSEC off
-    ""
-    
-    # DNSSEC on (trust)
-    "--dnssec"
-    
-    # DNSSEC on (auth)
-    "--dnssec --authoritative-dnssec"
-)
-
-echo "=== Running TCP-based protocols (TLS/HTTPS) ==="
-for FLAGS in "${CONN_COMBINATIONS[@]}"; do
-    echo "Running: $FLAGS"
-    
-    FLAGS_ARRAY=($FLAGS)
-    
-    sudo "$TOOL_PATH" run \
-        --output-dir "$OUTPUT_DIR" \
-        "${COMMON_ARGS[@]}" \
-        "${CONN_SERVERS[@]}" \
-        "${FLAGS_ARRAY[@]}" || true
-    
-    sleep "$SLEEP_TIME"
-done
-
-echo ""
-echo "=== Running QUIC-based protocols (DoH3/DoQ) ==="
-for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
-    echo "Running: $FLAGS"
-    
-    FLAGS_ARRAY=($FLAGS)
-    
-    sudo "$TOOL_PATH" run \
-        --output-dir "$OUTPUT_DIR" \
-        "${COMMON_ARGS[@]}" \
-        "${QUIC_SERVERS[@]}" \
-        "${FLAGS_ARRAY[@]}" || true
-    
-    sleep "$SLEEP_TIME"
-done
-
-echo ""
-echo "=== Running connectionless protocols (UDP) ==="
-for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
-    echo "Running: $FLAGS"
-    
-    FLAGS_ARRAY=($FLAGS)
-    
-    sudo "$TOOL_PATH" run \
-        --output-dir "$OUTPUT_DIR" \
-        "${COMMON_ARGS[@]}" \
-        "${CONNLESS_SERVERS[@]}" \
-        "${FLAGS_ARRAY[@]}" || true
-    
-    sleep "$SLEEP_TIME"
-done
-
-echo ""
-echo "All combinations completed!"
@@ -1,498 +0,0 @@
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
-from pathlib import Path
-from scipy import stats
-import warnings
-
-warnings.filterwarnings('ignore')
-
-# Set style for publication-quality plots
-sns.set_style("whitegrid")
-plt.rcParams['figure.dpi'] = 300
-plt.rcParams['savefig.dpi'] = 300
-plt.rcParams['font.size'] = 10
-plt.rcParams['figure.figsize'] = (12, 6)
-
-class DNSAnalyzer:
-    def __init__(self, results_dir='results'):
-        self.results_dir = Path(results_dir)
-        self.df = None
-        
-    def load_all_data(self):
-        """Load all CSV files from the results directory"""
-        data_frames = []
-        
-        providers = ['adguard', 'cloudflare', 'google', 'quad9']
-        
-        for provider in providers:
-            provider_path = self.results_dir / provider
-            if not provider_path.exists():
-                continue
-                
-            for csv_file in provider_path.glob('*.csv'):
-                try:
-                    df = pd.read_csv(csv_file)
-                    df['provider'] = provider
-                    df['test_config'] = csv_file.stem
-                    data_frames.append(df)
-                except Exception as e:
-                    print(f"Error loading {csv_file}: {e}")
-        
-        self.df = pd.concat(data_frames, ignore_index=True)
-        self._clean_and_enrich_data()
-        print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
-        
-    def _clean_and_enrich_data(self):
-        """Clean data and add useful columns"""
-        # Remove failed queries
-        self.df = self.df[self.df['error'].isna()]
-        
-        # Extract protocol base (remove -auth, -trust suffixes)
-        self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
-        
-        # DNSSEC configuration
-        self.df['dnssec_mode'] = 'none'
-        self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
-        self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
-        
-        # Protocol categories
-        self.df['protocol_category'] = self.df['protocol_base'].map({
-            'udp': 'Plain DNS',
-            'tls': 'DoT',
-            'https': 'DoH',
-            'doh3': 'DoH/3',
-            'doq': 'DoQ'
-        })
-        
-        # Connection persistence
-        self.df['persistence'] = self.df['keep_alive'].fillna(False)
-        
-    def generate_summary_statistics(self):
-        """Generate comprehensive summary statistics"""
-        print("\n" + "="*80)
-        print("SUMMARY STATISTICS")
-        print("="*80)
-        
-        # Overall statistics
-        print("\n--- Overall Performance ---")
-        print(f"Total queries: {len(self.df)}")
-        print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
-        print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
-        print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
-        print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
-        
-        # By protocol
-        print("\n--- Performance by Protocol ---")
-        protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
-            ('count', 'count'),
-            ('mean', 'mean'),
-            ('median', 'median'),
-            ('std', 'std'),
-            ('p95', lambda x: x.quantile(0.95)),
-            ('p99', lambda x: x.quantile(0.99))
-        ]).round(2)
-        print(protocol_stats)
-        
-        # By provider
-        print("\n--- Performance by Provider ---")
-        provider_stats = self.df.groupby('provider')['duration_ms'].agg([
-            ('count', 'count'),
-            ('mean', 'mean'),
-            ('median', 'median'),
-            ('std', 'std'),
-            ('p95', lambda x: x.quantile(0.95))
-        ]).round(2)
-        print(provider_stats)
-        
-        # DNSSEC impact
-        print("\n--- DNSSEC Validation Impact ---")
-        dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
-            ('count', 'count'),
-            ('mean', 'mean'),
-            ('median', 'median'),
-            ('overhead_vs_none', lambda x: x.mean())
-        ]).round(2)
-        
-        # Calculate overhead percentage
-        baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
-        if baseline > 0:
-            dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
-        print(dnssec_stats)
-        
-        # Bandwidth analysis
-        print("\n--- Bandwidth Usage ---")
-        bandwidth_stats = self.df.groupby('protocol_category').agg({
-            'request_size_bytes': ['mean', 'median'],
-            'response_size_bytes': ['mean', 'median']
-        }).round(2)
-        print(bandwidth_stats)
-        
-        # Persistence impact (where applicable)
-        print("\n--- Connection Persistence Impact ---")
-        persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
-        if len(persist_protocols) > 0:
-            persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
-                ('mean', 'mean'),
-                ('median', 'median')
-            ]).round(2)
-            print(persist_stats)
-        
-        return {
-            'protocol': protocol_stats,
-            'provider': provider_stats,
-            'dnssec': dnssec_stats,
-            'bandwidth': bandwidth_stats
-        }
-    
-    def plot_latency_by_protocol(self, output_dir='plots'):
-        """Violin plot of latency distribution by protocol"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        plt.figure(figsize=(14, 7))
-        
-        # Order protocols logically
-        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
-        available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
-        
-        sns.violinplot(data=self.df, x='protocol_category', y='duration_ms', 
-                      order=available_protocols, inner='box', cut=0)
-        
-        plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
-        plt.xlabel('Protocol', fontsize=12)
-        plt.ylabel('Response Time (ms)', fontsize=12)
-        plt.xticks(rotation=0)
-        
-        # Add mean values as annotations
-        for i, protocol in enumerate(available_protocols):
-            mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
-            plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: latency_by_protocol.png")
-    
-    def plot_provider_comparison(self, output_dir='plots'):
-        """Box plot comparing providers across protocols"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-        fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
-        
-        protocols = self.df['protocol_category'].unique()
-        protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
-        
-        for idx, protocol in enumerate(protocols[:4]):
-            ax = axes[idx // 2, idx % 2]
-            data = self.df[self.df['protocol_category'] == protocol]
-            
-            if len(data) > 0:
-                sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
-                ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
-                ax.set_xlabel('Provider', fontsize=10)
-                ax.set_ylabel('Response Time (ms)', fontsize=10)
-                ax.tick_params(axis='x', rotation=45)
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: provider_comparison.png")
-    
-    def plot_dnssec_impact(self, output_dir='plots'):
-        """Compare DNSSEC validation methods (trust vs auth)"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        # Filter for protocols that have DNSSEC variations
-        dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
-        
-        if len(dnssec_data) == 0:
-            print("⚠ No DNSSEC data available")
-            return
-        
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
-        
-        # Plot 1: Overall DNSSEC impact
-        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
-        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
-        
-        sns.barplot(data=self.df, x='protocol_category', y='duration_ms', 
-                   hue='dnssec_mode', order=available, ax=ax1, ci=95)
-        ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
-        ax1.set_xlabel('Protocol', fontsize=10)
-        ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
-        ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
-        ax1.tick_params(axis='x', rotation=0)
-        
-        # Plot 2: Trust vs Auth comparison
-        comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
-        pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
-        
-        if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
-            pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
-            pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
-            ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
-            ax2.set_xlabel('Protocol', fontsize=10)
-            ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
-            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
-            ax2.tick_params(axis='x', rotation=45)
-            ax2.grid(axis='y', alpha=0.3)
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: dnssec_impact.png")
-    
-    def plot_persistence_impact(self, output_dir='plots'):
-        """Analyze impact of connection persistence"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
-        
-        if len(persist_data) == 0:
-            print("⚠ No persistence data available")
-            return
-        
-        plt.figure(figsize=(12, 6))
-        
-        sns.barplot(data=persist_data, x='protocol_base', y='duration_ms', 
-                   hue='persistence', ci=95)
-        
-        plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
-        plt.xlabel('Protocol', fontsize=12)
-        plt.ylabel('Mean Response Time (ms)', fontsize=12)
-        plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
-        
-        # Calculate and annotate overhead reduction
-        for protocol in persist_data['protocol_base'].unique():
-            protocol_data = persist_data[persist_data['protocol_base'] == protocol]
-            
-            no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
-            with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
-            
-            if not np.isnan(no_persist) and not np.isnan(with_persist):
-                reduction = ((no_persist - with_persist) / no_persist * 100)
-                print(f"{protocol}: {reduction:.1f}% reduction with persistence")
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: persistence_impact.png")
-    
-    def plot_bandwidth_overhead(self, output_dir='plots'):
-        """Visualize bandwidth usage by protocol"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        bandwidth_data = self.df.groupby('protocol_category').agg({
-            'request_size_bytes': 'mean',
-            'response_size_bytes': 'mean'
-        }).reset_index()
-        
-        bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] + 
-                                         bandwidth_data['response_size_bytes'])
-        
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
-        
-        # Plot 1: Request vs Response sizes
-        x = np.arange(len(bandwidth_data))
-        width = 0.35
-        
-        ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width, 
-               label='Request', alpha=0.8)
-        ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width, 
-               label='Response', alpha=0.8)
-        
-        ax1.set_xlabel('Protocol', fontsize=12)
-        ax1.set_ylabel('Bytes', fontsize=12)
-        ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
-        ax1.set_xticks(x)
-        ax1.set_xticklabels(bandwidth_data['protocol_category'])
-        ax1.legend()
-        ax1.grid(axis='y', alpha=0.3)
-        
-        # Plot 2: Total bandwidth overhead vs UDP baseline
-        udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
-        if len(udp_total) > 0:
-            bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
-            
-            colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
-            ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'], 
-                   color=colors, alpha=0.7)
-            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
-            ax2.set_xlabel('Protocol', fontsize=12)
-            ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
-            ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
-            ax2.grid(axis='y', alpha=0.3)
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: bandwidth_overhead.png")
-    
-    def plot_heatmap(self, output_dir='plots'):
-        """Heatmap of provider-protocol performance"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        # Create pivot table
-        heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
-        
-        plt.figure(figsize=(12, 8))
-        sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r', 
-                   cbar_kws={'label': 'Median Latency (ms)'})
-        
-        plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
-        plt.xlabel('Protocol', fontsize=12)
-        plt.ylabel('Provider', fontsize=12)
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: provider_protocol_heatmap.png")
-    
-    def plot_percentile_comparison(self, output_dir='plots'):
-        """Plot percentile comparison across protocols"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        percentiles = [50, 75, 90, 95, 99]
-        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
-        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
-        
-        percentile_data = []
-        for protocol in available:
-            data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
-            for p in percentiles:
-                percentile_data.append({
-                    'protocol': protocol,
-                    'percentile': f'P{p}',
-                    'latency': np.percentile(data, p)
-                })
-        
-        percentile_df = pd.DataFrame(percentile_data)
-        
-        plt.figure(figsize=(14, 7))
-        sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
-        
-        plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
-        plt.xlabel('Protocol', fontsize=12)
-        plt.ylabel('Response Time (ms)', fontsize=12)
-        plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
-        
-        plt.tight_layout()
-        plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
-        plt.close()
-        print(f"✓ Saved: percentile_comparison.png")
-    
-    def statistical_tests(self):
-        """Perform statistical significance tests"""
-        print("\n" + "="*80)
-        print("STATISTICAL TESTS")
-        print("="*80)
-        
-        # Test 1: Protocol differences (Kruskal-Wallis)
-        protocols = self.df['protocol_category'].unique()
-        if len(protocols) > 2:
-            groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values 
-                     for p in protocols]
-            h_stat, p_value = stats.kruskal(*groups)
-            print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
-            print(f"H-statistic: {h_stat:.4f}")
-            print(f"p-value: {p_value:.4e}")
-            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
-        
-        # Test 2: DNSSEC impact (Mann-Whitney U)
-        if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
-            none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
-            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
-            
-            u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
-            print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
-            print(f"U-statistic: {u_stat:.4f}")
-            print(f"p-value: {p_value:.4e}")
-            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
-        
-        # Test 3: Trust vs Auth comparison
-        if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
-            trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
-            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
-            
-            u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
-            print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
-            print(f"U-statistic: {u_stat:.4f}")
-            print(f"p-value: {p_value:.4e}")
-            print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
-    
-    def generate_latex_table(self, output_dir='plots'):
-        """Generate LaTeX table for thesis"""
-        Path(output_dir).mkdir(exist_ok=True)
-        
-        # Summary table by protocol
-        summary = self.df.groupby('protocol_category')['duration_ms'].agg([
-            ('Mean', 'mean'),
-            ('Median', 'median'),
-            ('Std Dev', 'std'),
-            ('P95', lambda x: x.quantile(0.95)),
-            ('P99', lambda x: x.quantile(0.99))
-        ]).round(2)
-        
-        latex_code = summary.to_latex(float_format="%.2f")
-        
-        with open(f'{output_dir}/summary_table.tex', 'w') as f:
-            f.write(latex_code)
-        
-        print(f"✓ Saved: summary_table.tex")
-        print("\nLaTeX Table Preview:")
-        print(latex_code)
-    
-    def run_full_analysis(self):
-        """Run complete analysis pipeline"""
-        print("="*80)
-        print("DNS QoS Analysis - Starting Full Analysis")
-        print("="*80)
-        
-        # Load data
-        print("\n[1/10] Loading data...")
-        self.load_all_data()
-        
-        # Generate statistics
-        print("\n[2/10] Generating summary statistics...")
-        self.generate_summary_statistics()
-        
-        # Statistical tests
-        print("\n[3/10] Running statistical tests...")
-        self.statistical_tests()
-        
-        # Generate plots
-        print("\n[4/10] Creating latency by protocol plot...")
-        self.plot_latency_by_protocol()
-        
-        print("\n[5/10] Creating provider comparison plot...")
-        self.plot_provider_comparison()
-        
-        print("\n[6/10] Creating DNSSEC impact plot...")
-        self.plot_dnssec_impact()
-        
-        print("\n[7/10] Creating persistence impact plot...")
-        self.plot_persistence_impact()
-        
-        print("\n[8/10] Creating bandwidth overhead plot...")
-        self.plot_bandwidth_overhead()
-        
-        print("\n[9/10] Creating heatmap...")
-        self.plot_heatmap()
-        
-        print("\n[10/10] Creating percentile comparison...")
-        self.plot_percentile_comparison()
-        
-        # Generate LaTeX table
-        print("\n[Bonus] Generating LaTeX table...")
-        self.generate_latex_table()
-        
-        print("\n" + "="*80)
-        print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
-        print("="*80)
-
-
-if __name__ == "__main__":
-    analyzer = DNSAnalyzer(results_dir='results')
-    analyzer.run_full_analysis()
@@ -1,536 +0,0 @@
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
-from pathlib import Path
-import datetime
-from dateutil import parser as date_parser
-import dpkt
-
-# Set style
-sns.set_style("whitegrid")
-plt.rcParams['figure.dpi'] = 300
-plt.rcParams['savefig.dpi'] = 300
-plt.rcParams['font.size'] = 10
-
-class FastDNSAnalyzer:
-    def __init__(self, results_dir='results'):
-        self.results_dir = Path(results_dir)
-        self.all_data = []
-        
-    def should_include_file(self, filename):
-        """Filter out DNSSEC and non-persist files"""
-        name = filename.stem
-        if 'auth' in name or 'trust' in name:
-            return False
-        if name in ['tls', 'https']:
-            return False
-        return True
-    
-    def parse_rfc3339_nano(self, timestamp_str):
-        """Parse RFC3339Nano timestamp with timezone"""
-        try:
-            dt = date_parser.parse(timestamp_str)
-            return dt.astimezone(datetime.timezone.utc).timestamp()
-        except Exception as e:
-            print(f"    Error parsing timestamp {timestamp_str}: {e}")
-            return None
-    
-    def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
-        """Fast bandwidth extraction using dpkt"""
-        print(f"    Analyzing pcap: {pcap_file.name}")
-        
-        try:
-            with open(pcap_file, 'rb') as f:
-                pcap = dpkt.pcap.Reader(f)
-                
-                # Build query time windows
-                query_windows = []
-                for idx, row in csv_data.iterrows():
-                    start_time = self.parse_rfc3339_nano(row['timestamp'])
-                    if start_time is None:
-                        continue
-                    
-                    duration_seconds = row['duration_ns'] / 1_000_000_000
-                    end_time = start_time + duration_seconds
-                    
-                    query_windows.append({
-                        'index': idx,
-                        'start': start_time,
-                        'end': end_time,
-                        'bytes_sent': 0,
-                        'bytes_received': 0,
-                        'packets_sent': 0,
-                        'packets_received': 0
-                    })
-                
-                if not query_windows:
-                    print("    ✗ No valid query windows")
-                    return None
-                
-                # Sort windows for faster matching
-                query_windows.sort(key=lambda x: x['start'])
-                
-                # Process packets
-                packet_count = 0
-                matched_count = 0
-                
-                for timestamp, buf in pcap:
-                    packet_count += 1
-                    packet_size = len(buf)
-                    
-                    # Quick parse to determine direction
-                    try:
-                        eth = dpkt.ethernet.Ethernet(buf)
-                        
-                        # Get IP layer
-                        if isinstance(eth.data, dpkt.ip.IP):
-                            ip = eth.data
-                        elif isinstance(eth.data, dpkt.ip6.IP6):
-                            ip = eth.data
-                        else:
-                            continue
-                        
-                        # Get transport layer
-                        if isinstance(ip.data, dpkt.udp.UDP):
-                            transport = ip.data
-                            src_port = transport.sport
-                            dst_port = transport.dport
-                        elif isinstance(ip.data, dpkt.tcp.TCP):
-                            transport = ip.data
-                            src_port = transport.sport
-                            dst_port = transport.dport
-                        else:
-                            continue
-                        
-                        # Determine direction (client port usually higher)
-                        is_outbound = src_port > dst_port
-                        
-                        # Binary search for matching window
-                        for window in query_windows:
-                            if window['start'] <= timestamp <= window['end']:
-                                if is_outbound:
-                                    window['bytes_sent'] += packet_size
-                                    window['packets_sent'] += 1
-                                else:
-                                    window['bytes_received'] += packet_size
-                                    window['packets_received'] += 1
-                                matched_count += 1
-                                break
-                            elif timestamp < window['start']:
-                                break  # No more windows to check
-                    
-                    except Exception:
-                        continue
-                
-                print(f"    ✓ Processed {packet_count} packets, matched {matched_count}")
-                
-                # Convert to DataFrame
-                bandwidth_df = pd.DataFrame(query_windows)
-                return bandwidth_df[['index', 'bytes_sent', 'bytes_received', 
-                                   'packets_sent', 'packets_received']]
-        
-        except Exception as e:
-            print(f"    ✗ Error reading pcap: {e}")
-            return None
-    
-    def load_data(self):
-        """Load all relevant CSV files and extract bandwidth from pcaps"""
-        print("Loading data and analyzing bandwidth...")
-        
-        for provider_dir in self.results_dir.iterdir():
-            if not provider_dir.is_dir():
-                continue
-            
-            provider = provider_dir.name
-            
-            for csv_file in provider_dir.glob('*.csv'):
-                if not self.should_include_file(csv_file):
-                    continue
-                
-                try:
-                    df = pd.read_csv(csv_file)
-                    df['provider'] = provider
-                    df['test_file'] = csv_file.stem
-                    df['csv_path'] = str(csv_file)
-                    
-                    # Find corresponding pcap file
-                    pcap_file = csv_file.with_suffix('.pcap')
-                    if pcap_file.exists():
-                        print(f"  Processing: {provider}/{csv_file.name}")
-                        bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
-                        
-                        if bandwidth_data is not None and len(bandwidth_data) > 0:
-                            # Merge bandwidth data
-                            df = df.reset_index(drop=True)
-                            for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
-                                df[col] = 0
-                            
-                            for _, row in bandwidth_data.iterrows():
-                                idx = int(row['index'])
-                                if idx < len(df):
-                                    df.at[idx, 'bytes_sent'] = row['bytes_sent']
-                                    df.at[idx, 'bytes_received'] = row['bytes_received']
-                                    df.at[idx, 'packets_sent'] = row['packets_sent']
-                                    df.at[idx, 'packets_received'] = row['packets_received']
-                            
-                            df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
-                            
-                            print(f"    ✓ Extracted bandwidth for {len(df)} queries")
-                        else:
-                            print(f"    ⚠ Could not extract bandwidth data")
-                    else:
-                        print(f"  ⚠ No pcap found for {csv_file.name}")
-                    
-                    self.all_data.append(df)
-                    
-                except Exception as e:
-                    print(f"  ✗ Error loading {csv_file}: {e}")
-                    import traceback
-                    traceback.print_exc()
-        
-        print(f"\nTotal files loaded: {len(self.all_data)}")
-    
-    def create_line_graphs(self, output_dir='output/line_graphs'):
-        """Create line graphs for latency and bandwidth"""
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-        
-        print("\nGenerating line graphs...")
-        
-        for df in self.all_data:
-            provider = df['provider'].iloc[0]
-            test_name = df['test_file'].iloc[0]
-            
-            df['query_index'] = range(1, len(df) + 1)
-            
-            # Create figure with 2 subplots
-            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
-            
-            # Plot 1: Latency
-            ax1.plot(df['query_index'], df['duration_ms'], marker='o', 
-                    markersize=4, linewidth=1, alpha=0.7, color='steelblue')
-            mean_latency = df['duration_ms'].mean()
-            ax1.axhline(y=mean_latency, color='r', linestyle='--', 
-                       label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
-            ax1.set_xlabel('Query Number', fontsize=12)
-            ax1.set_ylabel('Latency (ms)', fontsize=12)
-            ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
-            ax1.legend()
-            ax1.grid(True, alpha=0.3)
-            
-            # Plot 2: Bandwidth
-            if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
-                ax2.plot(df['query_index'], df['bytes_sent'], marker='s', 
-                        markersize=4, linewidth=1, alpha=0.7, 
-                        color='orange', label='Sent')
-                ax2.plot(df['query_index'], df['bytes_received'], marker='^', 
-                        markersize=4, linewidth=1, alpha=0.7, 
-                        color='green', label='Received')
-                
-                mean_sent = df['bytes_sent'].mean()
-                mean_received = df['bytes_received'].mean()
-                ax2.axhline(y=mean_sent, color='orange', linestyle='--', 
-                           linewidth=1.5, alpha=0.5)
-                ax2.axhline(y=mean_received, color='green', linestyle='--', 
-                           linewidth=1.5, alpha=0.5)
-                
-                ax2.set_xlabel('Query Number', fontsize=12)
-                ax2.set_ylabel('Bytes', fontsize=12)
-                ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)', 
-                             fontsize=12, fontweight='bold')
-                ax2.legend()
-                ax2.grid(True, alpha=0.3)
-            
-            fig.suptitle(f'{provider.upper()} - {test_name}', 
-                        fontsize=14, fontweight='bold')
-            plt.tight_layout()
-            
-            filename = f"{provider}_{test_name}.png"
-            plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
-            plt.close()
-            
-            print(f"  ✓ Created: {filename}")
-    
-    def get_protocol_name(self, test_file):
-        """Extract clean protocol name"""
-        name = test_file.replace('-persist', '')
-        
-        protocol_map = {
-            'udp': 'Plain DNS (UDP)',
-            'tls': 'DoT (DNS over TLS)',
-            'https': 'DoH (DNS over HTTPS)',
-            'doh3': 'DoH/3 (DNS over HTTP/3)',
-            'doq': 'DoQ (DNS over QUIC)'
-        }
-        
-        return protocol_map.get(name, name.upper())
-    
-    def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
-        """Create bar graphs comparing resolvers for latency and bandwidth"""
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-        
-        print("\nGenerating resolver comparison graphs...")
-        
-        combined_df = pd.concat(self.all_data, ignore_index=True)
-        protocols = combined_df['test_file'].unique()
-        
-        for protocol in protocols:
-            protocol_data = combined_df[combined_df['test_file'] == protocol]
-            protocol_name = self.get_protocol_name(protocol)
-            
-            # Latency stats
-            latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
-                ('mean', 'mean'),
-                ('median', 'median'),
-                ('std', 'std')
-            ]).reset_index()
-            
-            # Create latency comparison
-            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
-            fig.suptitle(f'{protocol_name} - Latency Comparison', 
-                        fontsize=16, fontweight='bold')
-            
-            # Mean latency
-            bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'], 
-                           color='steelblue', alpha=0.8, edgecolor='black')
-            ax1.errorbar(latency_stats['provider'], latency_stats['mean'], 
-                        yerr=latency_stats['std'], fmt='none', color='black', 
-                        capsize=5, alpha=0.6)
-            
-            for bar in bars1:
-                height = bar.get_height()
-                ax1.text(bar.get_x() + bar.get_width()/2., height,
-                        f'{height:.2f}',
-                        ha='center', va='bottom', fontweight='bold')
-            
-            ax1.set_xlabel('Resolver', fontsize=12)
-            ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
-            ax1.set_title('Mean Latency', fontsize=12)
-            ax1.grid(axis='y', alpha=0.3)
-            
-            # Median latency
-            bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'], 
-                           color='coral', alpha=0.8, edgecolor='black')
-            
-            for bar in bars2:
-                height = bar.get_height()
-                ax2.text(bar.get_x() + bar.get_width()/2., height,
-                        f'{height:.2f}',
-                        ha='center', va='bottom', fontweight='bold')
-            
-            ax2.set_xlabel('Resolver', fontsize=12)
-            ax2.set_ylabel('Median Latency (ms)', fontsize=12)
-            ax2.set_title('Median Latency', fontsize=12)
-            ax2.grid(axis='y', alpha=0.3)
-            
-            plt.tight_layout()
-            plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
-            plt.close()
-            print(f"  ✓ Created: latency_{protocol}.png")
-            
-            # Bandwidth comparison
-            if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
-                bandwidth_stats = protocol_data.groupby('provider').agg({
-                    'bytes_sent': 'mean',
-                    'bytes_received': 'mean',
-                    'total_bytes': 'mean'
-                }).reset_index()
-                
-                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
-                fig.suptitle(f'{protocol_name} - Bandwidth Comparison', 
-                            fontsize=16, fontweight='bold')
-                
-                # Sent vs Received
-                x = np.arange(len(bandwidth_stats))
-                width = 0.35
-                
-                bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
-                               label='Sent', color='orange', alpha=0.8, edgecolor='black')
-                bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
-                               label='Received', color='green', alpha=0.8, edgecolor='black')
-                
-                ax1.set_xlabel('Resolver', fontsize=12)
-                ax1.set_ylabel('Bytes per Query', fontsize=12)
-                ax1.set_title('Average Bandwidth per Query', fontsize=12)
-                ax1.set_xticks(x)
-                ax1.set_xticklabels(bandwidth_stats['provider'])
-                ax1.legend()
-                ax1.grid(axis='y', alpha=0.3)
-                
-                # Total bandwidth
-                bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
-                               color='purple', alpha=0.8, edgecolor='black')
-                
-                for bar in bars3:
-                    height = bar.get_height()
-                    ax2.text(bar.get_x() + bar.get_width()/2., height,
-                            f'{height:.0f}',
-                            ha='center', va='bottom', fontweight='bold')
-                
-                ax2.set_xlabel('Resolver', fontsize=12)
-                ax2.set_ylabel('Total Bytes per Query', fontsize=12)
-                ax2.set_title('Total Bandwidth per Query', fontsize=12)
-                ax2.grid(axis='y', alpha=0.3)
-                
-                plt.tight_layout()
-                plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
-                plt.close()
-                print(f"  ✓ Created: bandwidth_{protocol}.png")
-    
-    def generate_latex_tables(self, output_dir='output/tables'):
-        """Generate LaTeX tables with latency and bandwidth statistics"""
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-        
-        print("\nGenerating LaTeX tables...")
-        
-        combined_df = pd.concat(self.all_data, ignore_index=True)
-        
-        # Generate latency table for each resolver
-        for provider in combined_df['provider'].unique():
-            provider_data = combined_df[combined_df['provider'] == provider]
-            
-            stats = provider_data.groupby('test_file')['duration_ms'].agg([
-                ('Mean', 'mean'),
-                ('Median', 'median'),
-                ('Std Dev', 'std'),
-                ('P95', lambda x: x.quantile(0.95)),
-                ('P99', lambda x: x.quantile(0.99))
-            ]).round(2)
-            
-            stats.index = stats.index.map(self.get_protocol_name)
-            stats.index.name = 'Protocol'
-            
-            latex_code = stats.to_latex(
-                caption=f'{provider.upper()} - Latency Statistics (ms)',
-                label=f'tab:{provider}_latency',
-                float_format="%.2f"
-            )
-            
-            with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
-                f.write(latex_code)
-            
-            print(f"  ✓ Created: {provider}_latency.tex")
-        
-        # Generate bandwidth table for each resolver
-        for provider in combined_df['provider'].unique():
-            provider_data = combined_df[combined_df['provider'] == provider]
-            
-            if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
-                continue
-            
-            bandwidth_stats = provider_data.groupby('test_file').agg({
-                'bytes_sent': 'mean',
-                'bytes_received': 'mean',
-                'total_bytes': 'mean'
-            }).round(2)
-            
-            bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
-            bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
-            bandwidth_stats.index.name = 'Protocol'
-            
-            latex_code = bandwidth_stats.to_latex(
-                caption=f'{provider.upper()} - Bandwidth Statistics',
-                label=f'tab:{provider}_bandwidth',
-                float_format="%.2f"
-            )
-            
-            with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
-                f.write(latex_code)
-            
-            print(f"  ✓ Created: {provider}_bandwidth.tex")
-        
-        # Generate protocol efficiency table
-        print("\nGenerating protocol efficiency table...")
-        
-        if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
-            protocol_bandwidth = combined_df.groupby('test_file').agg({
-                'bytes_sent': 'mean',
-                'bytes_received': 'mean',
-                'total_bytes': 'mean'
-            }).round(2)
-            
-            # Find UDP baseline
-            udp_baseline = None
-            for protocol in protocol_bandwidth.index:
-                if 'udp' in protocol:
-                    udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
-                    break
-            
-            if udp_baseline and udp_baseline > 0:
-                protocol_bandwidth['Overhead vs UDP (%)'] = (
-                    (protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
-                ).round(1)
-                protocol_bandwidth['Efficiency (%)'] = (
-                    100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
-                ).round(1)
-            
-            protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)', 
-                                         'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
-            protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
-            protocol_bandwidth.index.name = 'Protocol'
-            
-            latex_code = protocol_bandwidth.to_latex(
-                caption='Protocol Bandwidth Efficiency Comparison',
-                label='tab:protocol_efficiency',
-                float_format="%.2f"
-            )
-            
-            with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
-                f.write(latex_code)
-            
-            print(f"  ✓ Created: protocol_efficiency.tex")
-            print("\n--- Protocol Efficiency ---")
-            print(protocol_bandwidth.to_string())
-        
-        # Generate combined comparison tables
-        for metric in ['Mean', 'Median', 'P95']:
-            comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
-                ('Mean', 'mean'),
-                ('Median', 'median'),
-                ('P95', lambda x: x.quantile(0.95))
-            ]).round(2)
-            
-            pivot_table = comparison_stats[metric].unstack(level=0)
-            pivot_table.index = pivot_table.index.map(self.get_protocol_name)
-            pivot_table.index.name = 'Protocol'
-            
-            latex_code = pivot_table.to_latex(
-                caption=f'Resolver Latency Comparison - {metric} (ms)',
-                label=f'tab:comparison_{metric.lower()}',
-                float_format="%.2f"
-            )
-            
-            with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
-                f.write(latex_code)
-            
-            print(f"  ✓ Created: comparison_{metric.lower()}.tex")
-    
-    def run_analysis(self):
-        """Run the complete analysis"""
-        print("="*80)
-        print("Fast DNS QoS Analysis with Bandwidth")
-        print("="*80)
-        
-        self.load_data()
-        
-        if not self.all_data:
-            print("\n⚠ No data loaded.")
-            return
-        
-        print("\n" + "="*80)
-        self.create_line_graphs()
-        
-        print("\n" + "="*80)
-        self.create_resolver_comparison_bars()
-        
-        print("\n" + "="*80)
-        self.generate_latex_tables()
-        
-        print("\n" + "="*80)
-        print("✓ Analysis Complete!")
-        print("="*80)
-
-
-if __name__ == "__main__":
-    analyzer = FastDNSAnalyzer(results_dir='results')
-    analyzer.run_analysis()
@@ -15,6 +15,15 @@ import dpkt
 from dateutil import parser as date_parser


+BANDWIDTH_COLUMNS = [
+    'bytes_sent',
+    'bytes_received',
+    'packets_sent',
+    'packets_received',
+    'total_bytes',
+]
+
+
 class Packet(NamedTuple):
    """Lightweight packet representation."""
    timestamp: float
@@ -36,6 +45,36 @@ class QueryWindow:
        self.pkts_received = 0


+def is_already_processed(csv_path: Path) -> bool:
+    """
+    Check if CSV has already been processed.
+    Returns True if bandwidth columns exist AND at least one row has non-zero data.
+    """
+    try:
+        with open(csv_path, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            
+            # Check if columns exist
+            if not reader.fieldnames:
+                return False
+            
+            if not all(col in reader.fieldnames for col in BANDWIDTH_COLUMNS):
+                return False
+            
+            # Check if any row has non-zero bandwidth data
+            for row in reader:
+                for col in BANDWIDTH_COLUMNS:
+                    val = row.get(col, '').strip()
+                    if val and val != '0':
+                        return True
+            
+            # All rows have zero/empty values - not truly processed
+            return False
+            
+    except Exception:
+        return False
+
+
 def parse_csv_timestamp(ts_str: str) -> float:
    """Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
    dt = date_parser.isoparse(ts_str)
@@ -249,24 +288,20 @@ def write_enriched_csv(
            shutil.copy2(csv_path, backup_path)
            print(f"  Backup: {backup_path.name}")
    
-    # Get fieldnames
-    original_fields = list(queries[0]['data'].keys())
-    new_fields = [
-        'bytes_sent',
-        'bytes_received',
-        'packets_sent',
-        'packets_received',
-        'total_bytes',
+    # Get fieldnames - filter out any existing bandwidth columns to avoid dupes
+    original_fields = [
+        f for f in queries[0]['data'].keys()
+        if f not in BANDWIDTH_COLUMNS
    ]
-    fieldnames = original_fields + new_fields
+    fieldnames = original_fields + BANDWIDTH_COLUMNS
    
    with open(csv_path, 'w', encoding='utf-8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        for q in queries:
-            row = q['data'].copy()
-            for field in new_fields:
+            row = {k: v for k, v in q['data'].items() if k not in BANDWIDTH_COLUMNS}
+            for field in BANDWIDTH_COLUMNS:
                row[field] = q[field]
            writer.writerow(row)
    
@@ -281,6 +316,7 @@ def process_provider_directory(provider_path: Path):
    
    csv_files = sorted(provider_path.glob('*.csv'))
    processed = 0
+    skipped = 0
    total_time = 0
    
    for csv_path in csv_files:
@@ -294,6 +330,12 @@ def process_provider_directory(provider_path: Path):
            print(f"\n  ⚠ Skipping {csv_path.name} - no matching PCAP")
            continue
        
+        # Check if already processed
+        if is_already_processed(csv_path):
+            print(f"\n  ⏭ Skipping {csv_path.name} - already processed")
+            skipped += 1
+            continue
+        
        print(f"\n  📁 {csv_path.name}")
        file_start = time.time()
        
@@ -323,7 +365,8 @@ def process_provider_directory(provider_path: Path):
        print(f"    ✓ Completed in {file_time:.2f}s")
    
    print(f"\n  {'='*58}")
-    print(f"  {provider_path.name}: {processed} files in {total_time:.2f}s")
+    print(f"  {provider_path.name}: {processed} processed, {skipped} skipped")
+    print(f"  Time: {total_time:.2f}s")
    print(f"  {'='*58}")


@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+Merge all DNS test CSVs into a single unified CSV.
+Extracts metadata from filenames and directory structure.
+"""
+
+import csv
+import os
+from pathlib import Path
+from dateutil import parser as date_parser
+import argparse
+
+
+def parse_config(filename: str) -> dict:
+    """
+    Parse protocol, dnssec_mode, and keep_alive from filename.
+    
+    Examples:
+        doh3-auth.csv         → protocol=doh3, dnssec=auth, persist=0
+        tls-trust-persist.csv → protocol=tls, dnssec=trust, persist=1
+        https.csv             → protocol=https, dnssec=off, persist=0
+        doudp-auth.csv        → protocol=doudp, dnssec=auth, persist=0
+        dnscrypt-trust.csv    → protocol=dnscrypt, dnssec=trust, persist=0
+    """
+    base = filename.replace('.csv', '')
+    parts = base.split('-')
+    
+    protocol = parts[0]
+    dnssec_mode = 'off'
+    keep_alive = 0
+    
+    for part in parts[1:]:
+        if part in ('auth', 'trust'):
+            dnssec_mode = part
+        elif part == 'persist':
+            keep_alive = 1
+    
+    return {
+        'protocol': protocol,
+        'dnssec_mode': dnssec_mode,
+        'keep_alive': keep_alive,
+    }
+
+
+def parse_timestamp_unix(ts_str: str) -> float:
+    """Convert RFC3339 timestamp to Unix epoch."""
+    try:
+        dt = date_parser.isoparse(ts_str)
+        return dt.timestamp()
+    except Exception:
+        return 0.0
+
+
+def ns_to_ms(duration_ns: str) -> float:
+    """Convert nanoseconds to milliseconds."""
+    try:
+        return float(duration_ns) / 1_000_000
+    except (ValueError, TypeError):
+        return 0.0
+
+
+def find_csv_files(input_dir: Path) -> list:
+    """Find all non-backup CSV files."""
+    files = []
+    for csv_path in input_dir.rglob('*.csv'):
+        if '.bak' in csv_path.name:
+            continue
+        files.append(csv_path)
+    return sorted(files)
+
+
+def merge_all_csvs(input_dir: Path, output_path: Path):
+    """Merge all CSVs into a single file."""
+    
+    csv_files = find_csv_files(input_dir)
+    
+    if not csv_files:
+        print("No CSV files found")
+        return
+    
+    print(f"Found {len(csv_files)} CSV files")
+    
+    # Output columns in desired order
+    output_columns = [
+        'id',
+        'provider',
+        'protocol',
+        'dnssec_mode',
+        'domain',
+        'query_type',
+        'keep_alive',
+        'dns_server',
+        'timestamp',
+        'timestamp_unix',
+        'duration_ns',
+        'duration_ms',
+        'request_size_bytes',
+        'response_size_bytes',
+        'bytes_sent',
+        'bytes_received',
+        'packets_sent',
+        'packets_received',
+        'total_bytes',
+        'response_code',
+        'error',
+    ]
+    
+    global_id = 0
+    total_rows = 0
+    
+    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
+        writer = csv.DictWriter(outfile, fieldnames=output_columns)
+        writer.writeheader()
+        
+        for csv_path in csv_files:
+            # Extract provider from path
+            provider = csv_path.parent.name.lower()
+            
+            # Parse config from filename
+            config = parse_config(csv_path.name)
+            
+            print(f"  {provider}/{csv_path.name} ({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
+            
+            file_rows = 0
+            
+            with open(csv_path, 'r', newline='', encoding='utf-8') as infile:
+                reader = csv.DictReader(infile)
+                
+                for row in reader:
+                    global_id += 1
+                    file_rows += 1
+                    
+                    # Build output row
+                    out_row = {
+                        'id': global_id,
+                        'provider': provider,
+                        'protocol': config['protocol'],
+                        'dnssec_mode': config['dnssec_mode'],
+                        'keep_alive': config['keep_alive'],
+                        'domain': row.get('domain', ''),
+                        'query_type': row.get('query_type', ''),
+                        'dns_server': row.get('dns_server', ''),
+                        'timestamp': row.get('timestamp', ''),
+                        'timestamp_unix': parse_timestamp_unix(row.get('timestamp', '')),
+                        'duration_ns': row.get('duration_ns', ''),
+                        'duration_ms': ns_to_ms(row.get('duration_ns', '')),
+                        'request_size_bytes': row.get('request_size_bytes', ''),
+                        'response_size_bytes': row.get('response_size_bytes', ''),
+                        'bytes_sent': row.get('bytes_sent', ''),
+                        'bytes_received': row.get('bytes_received', ''),
+                        'packets_sent': row.get('packets_sent', ''),
+                        'packets_received': row.get('packets_received', ''),
+                        'total_bytes': row.get('total_bytes', ''),
+                        'response_code': row.get('response_code', ''),
+                        'error': row.get('error', ''),
+                    }
+                    
+                    writer.writerow(out_row)
+            
+            total_rows += file_rows
+            print(f"    → {file_rows:,} rows")
+    
+    print(f"\n{'='*60}")
+    print(f"Output: {output_path}")
+    print(f"Total rows: {total_rows:,}")
+    print(f"{'='*60}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Merge all DNS test CSVs into a single file'
+    )
+    parser.add_argument(
+        'input_dir',
+        nargs='?',
+        default='.',
+        help='Input directory containing provider folders (default: .)'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        default='dns_results.csv',
+        help='Output CSV path (default: dns_results.csv)'
+    )
+    
+    args = parser.parse_args()
+    
+    input_dir = Path(args.input_dir)
+    output_path = Path(args.output)
+    
+    if not input_dir.exists():
+        print(f"Error: Input directory not found: {input_dir}")
+        return 1
+    
+    print("="*60)
+    print("MERGE ALL DNS CSVs")
+    print("="*60)
+    print(f"Input:  {input_dir}")
+    print(f"Output: {output_path}")
+    print()
+    
+    merge_all_csvs(input_dir, output_path)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())
@@ -0,0 +1,95 @@
+package stats
+
+import (
+	"encoding/csv"
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+)
+
+type RuntimeStats struct {
+	TotalAlloc   uint64
+	Mallocs      uint64
+	NumGC        uint32
+	AllocDelta   uint64
+	MallocsDelta uint64
+	GCDelta      uint32
+}
+
+type RuntimeCollector struct {
+	startStats runtime.MemStats
+	memPath    string
+}
+
+func NewRuntimeCollector(memPath string) *RuntimeCollector {
+	var stats runtime.MemStats
+	runtime.ReadMemStats(&stats)
+
+	return &RuntimeCollector{
+		startStats: stats,
+		memPath:    memPath,
+	}
+}
+
+func (rc *RuntimeCollector) Collect() RuntimeStats {
+	var current runtime.MemStats
+	runtime.ReadMemStats(&current)
+
+	return RuntimeStats{
+		TotalAlloc:   current.TotalAlloc,
+		Mallocs:      current.Mallocs,
+		NumGC:        current.NumGC,
+		AllocDelta:   current.TotalAlloc - rc.startStats.TotalAlloc,
+		MallocsDelta: current.Mallocs - rc.startStats.Mallocs,
+		GCDelta:      current.NumGC - rc.startStats.NumGC,
+	}
+}
+
+func (rc *RuntimeCollector) WriteStats() error {
+	stats := rc.Collect()
+	timestamp := time.Now().Format(time.RFC3339Nano)
+
+	// Check if file exists
+	fileExists := false
+	if _, err := os.Stat(rc.memPath); err == nil {
+		fileExists = true
+	}
+
+	// Open in append mode
+	file, err := os.OpenFile(rc.memPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to open mem.csv: %w", err)
+	}
+	defer file.Close()
+
+	writer := csv.NewWriter(file)
+
+	// Write header if new file
+	if !fileExists {
+		header := []string{
+			"timestamp", "total_alloc_bytes", "mallocs", "gc_cycles",
+			"alloc_delta", "mallocs_delta", "gc_delta",
+		}
+		if err := writer.Write(header); err != nil {
+			return fmt.Errorf("failed to write mem.csv header: %w", err)
+		}
+	}
+
+	// Write data row
+	row := []string{
+		timestamp,
+		fmt.Sprintf("%d", stats.TotalAlloc),
+		fmt.Sprintf("%d", stats.Mallocs),
+		fmt.Sprintf("%d", stats.NumGC),
+		fmt.Sprintf("%d", stats.AllocDelta),
+		fmt.Sprintf("%d", stats.MallocsDelta),
+		fmt.Sprintf("%d", stats.GCDelta),
+	}
+	if err := writer.Write(row); err != nil {
+		return fmt.Errorf("failed to write mem.csv row: %w", err)
+	}
+
+	writer.Flush()
+	return writer.Error()
+}
@@ -1,369 +0,0 @@
-package main
-
-import (
-	"encoding/csv"
-	"fmt"
-	"log"
-	"os"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/google/gopacket"
-	"github.com/google/gopacket/layers"
-	"github.com/google/gopacket/pcapgo"
-)
-
-type QueryRecord struct {
-	Domain              string
-	QueryType           string
-	Protocol            string
-	DNSSec              string
-	AuthDNSSec          string
-	KeepAlive           string
-	DNSServer           string
-	Timestamp           string
-	DurationNs          int64
-	DurationMs          float64
-	RequestSizeBytes    int
-	ResponseSizeBytes   int
-	ResponseCode        string
-	Error               string
-	BytesSent           int64
-	BytesReceived       int64
-	PacketsSent         int64
-	PacketsReceived     int64
-	TotalBytes          int64
-}
-
-func parseRFC3339Nano(ts string) (time.Time, error) {
-	return time.Parse(time.RFC3339Nano, ts)
-}
-
-func processProviderFolder(providerPath string) error {
-	providerName := filepath.Base(providerPath)
-	fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
-	
-	files, err := os.ReadDir(providerPath)
-	if err != nil {
-		return err
-	}
-
-	processed := 0
-	skipped := 0
-	errors := 0
-
-	for _, file := range files {
-		if !strings.HasSuffix(file.Name(), ".csv") {
-			continue
-		}
-
-		csvPath := filepath.Join(providerPath, file.Name())
-		pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
-		
-		// Check if PCAP exists
-		if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
-			fmt.Printf("  ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
-			skipped++
-			continue
-		}
-
-		// Check if already processed (has backup)
-		backupPath := csvPath + ".bak"
-		if _, err := os.Stat(backupPath); err == nil {
-			fmt.Printf("  ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
-			skipped++
-			continue
-		}
-
-		fmt.Printf("  ↻ Processing: %s ... ", file.Name())
-		if err := processPair(csvPath, pcapPath); err != nil {
-			fmt.Printf("ERROR\n")
-			log.Printf("    Error: %v\n", err)
-			errors++
-		} else {
-			fmt.Printf("✓\n")
-			processed++
-		}
-	}
-
-	fmt.Printf("  Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
-	return nil
-}
-
-func processPair(csvPath, pcapPath string) error {
-	// Create backup
-	backupPath := csvPath + ".bak"
-	input, err := os.ReadFile(csvPath)
-	if err != nil {
-		return fmt.Errorf("backup read failed: %w", err)
-	}
-	if err := os.WriteFile(backupPath, input, 0644); err != nil {
-		return fmt.Errorf("backup write failed: %w", err)
-	}
-
-	// Read CSV records
-	records, err := readCSV(csvPath)
-	if err != nil {
-		return fmt.Errorf("CSV read failed: %w", err)
-	}
-
-	if len(records) == 0 {
-		return fmt.Errorf("no records in CSV")
-	}
-
-	// Read and parse PCAP
-	packets, err := readPCAPGo(pcapPath)
-	if err != nil {
-		return fmt.Errorf("PCAP read failed: %w", err)
-	}
-
-	// Enrich records with bandwidth data
-	enrichRecords(records, packets)
-
-	// Write enriched CSV
-	if err := writeCSV(csvPath, records); err != nil {
-		return fmt.Errorf("CSV write failed: %w", err)
-	}
-
-	return nil
-}
-
-func readCSV(path string) ([]*QueryRecord, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	r := csv.NewReader(f)
-	rows, err := r.ReadAll()
-	if err != nil {
-		return nil, err
-	}
-
-	if len(rows) < 2 {
-		return nil, fmt.Errorf("CSV has no data rows")
-	}
-
-	records := make([]*QueryRecord, 0, len(rows)-1)
-	for i := 1; i < len(rows); i++ {
-		row := rows[i]
-		if len(row) < 14 {
-			log.Printf("    Warning: Skipping malformed row %d", i+1)
-			continue
-		}
-
-		durationNs, _ := strconv.ParseInt(row[8], 10, 64)
-		durationMs, _ := strconv.ParseFloat(row[9], 64)
-		reqSize, _ := strconv.Atoi(row[10])
-		respSize, _ := strconv.Atoi(row[11])
-
-		records = append(records, &QueryRecord{
-			Domain:            row[0],
-			QueryType:         row[1],
-			Protocol:          row[2],
-			DNSSec:            row[3],
-			AuthDNSSec:        row[4],
-			KeepAlive:         row[5],
-			DNSServer:         row[6],
-			Timestamp:         row[7],
-			DurationNs:        durationNs,
-			DurationMs:        durationMs,
-			RequestSizeBytes:  reqSize,
-			ResponseSizeBytes: respSize,
-			ResponseCode:      row[12],
-			Error:             row[13],
-		})
-	}
-
-	return records, nil
-}
-
-type PacketInfo struct {
-	Timestamp time.Time
-	Size      int
-	IsSent    bool
-}
-
-func readPCAPGo(path string) ([]PacketInfo, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	reader, err := pcapgo.NewReader(f)
-	if err != nil {
-		return nil, err
-	}
-
-	var packets []PacketInfo
-	packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
-
-	for packet := range packetSource.Packets() {
-		if packet.NetworkLayer() == nil {
-			continue
-		}
-
-		isDNS := false
-		isSent := false
-
-		// Check UDP layer (DNS, DoQ, DoH3)
-		if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
-			udp := udpLayer.(*layers.UDP)
-			isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
-				udp.SrcPort == 853 || udp.DstPort == 853 ||
-				udp.SrcPort == 443 || udp.DstPort == 443
-			isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
-		}
-
-		// Check TCP layer (DoT, DoH)
-		if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
-			tcp := tcpLayer.(*layers.TCP)
-			isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
-				tcp.SrcPort == 853 || tcp.DstPort == 853 ||
-				tcp.SrcPort == 443 || tcp.DstPort == 443
-			isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
-		}
-
-		if isDNS {
-			packets = append(packets, PacketInfo{
-				Timestamp: packet.Metadata().Timestamp,
-				Size:      len(packet.Data()),
-				IsSent:    isSent,
-			})
-		}
-	}
-
-	return packets, nil
-}
-
-func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
-	for _, rec := range records {
-		ts, err := parseRFC3339Nano(rec.Timestamp)
-		if err != nil {
-			log.Printf("    Warning: Failed to parse timestamp: %s", rec.Timestamp)
-			continue
-		}
-
-		// Define time window for this query
-		windowStart := ts
-		windowEnd := ts.Add(time.Duration(rec.DurationNs))
-
-		var sent, recv, pktSent, pktRecv int64
-
-		// Match packets within the time window
-		for _, pkt := range packets {
-			if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
-				pkt.Timestamp.Before(windowEnd) {
-				if pkt.IsSent {
-					sent += int64(pkt.Size)
-					pktSent++
-				} else {
-					recv += int64(pkt.Size)
-					pktRecv++
-				}
-			}
-		}
-
-		rec.BytesSent = sent
-		rec.BytesReceived = recv
-		rec.PacketsSent = pktSent
-		rec.PacketsReceived = pktRecv
-		rec.TotalBytes = sent + recv
-	}
-}
-
-func writeCSV(path string, records []*QueryRecord) error {
-	f, err := os.Create(path)
-	if err != nil {
-		return err
-	}
-	defer f.Close()
-
-	w := csv.NewWriter(f)
-	defer w.Flush()
-
-	// Write header
-	header := []string{
-		"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
-		"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
-		"request_size_bytes", "response_size_bytes", "response_code", "error",
-		"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
-	}
-	if err := w.Write(header); err != nil {
-		return err
-	}
-
-	// Write data rows
-	for _, rec := range records {
-		row := []string{
-			rec.Domain,
-			rec.QueryType,
-			rec.Protocol,
-			rec.DNSSec,
-			rec.AuthDNSSec,
-			rec.KeepAlive,
-			rec.DNSServer,
-			rec.Timestamp,
-			strconv.FormatInt(rec.DurationNs, 10),
-			strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
-			strconv.Itoa(rec.RequestSizeBytes),
-			strconv.Itoa(rec.ResponseSizeBytes),
-			rec.ResponseCode,
-			rec.Error,
-			strconv.FormatInt(rec.BytesSent, 10),
-			strconv.FormatInt(rec.BytesReceived, 10),
-			strconv.FormatInt(rec.PacketsSent, 10),
-			strconv.FormatInt(rec.PacketsReceived, 10),
-			strconv.FormatInt(rec.TotalBytes, 10),
-		}
-		if err := w.Write(row); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func main() {
-	resultsDir := "results"
-	providers := []string{"adguard", "cloudflare", "google", "quad9"}
-
-	fmt.Println("╔═══════════════════════════════════════════════╗")
-	fmt.Println("║   DNS PCAP Preprocessor v1.0                  ║")
-	fmt.Println("║   Enriching ALL CSVs with bandwidth metrics   ║")
-	fmt.Println("╚═══════════════════════════════════════════════╝")
-
-	totalProcessed := 0
-	totalSkipped := 0
-	totalErrors := 0
-
-	for _, provider := range providers {
-		providerPath := filepath.Join(resultsDir, provider)
-		if _, err := os.Stat(providerPath); os.IsNotExist(err) {
-			fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
-			continue
-		}
-
-		if err := processProviderFolder(providerPath); err != nil {
-			log.Printf("Error processing %s: %v\n", provider, err)
-			totalErrors++
-		}
-	}
-
-	fmt.Println("\n╔═══════════════════════════════════════════════╗")
-	fmt.Println("║   Preprocessing Complete!                     ║")
-	fmt.Println("╚═══════════════════════════════════════════════╝")
-	fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
-	fmt.Printf("  • bytes_sent          - Total bytes sent to DNS server\n")
-	fmt.Printf("  • bytes_received      - Total bytes received from DNS server\n")
-	fmt.Printf("  • packets_sent        - Number of packets sent\n")
-	fmt.Printf("  • packets_received    - Number of packets received\n")
-	fmt.Printf("  • total_bytes         - Sum of sent + received bytes\n")
-	fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
-	fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
-	fmt.Printf("   but all files now have complete bandwidth metrics!\n")
-}
@@ -1,367 +0,0 @@
-#!/usr/bin/env python3
-"""
-Advanced PCAP filter for DNS traffic (with IPv6 support).
-
-Filters out:
- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses)
- AdGuard DNS servers (for non-AdGuard captures)
- Non-DNS traffic based on protocol-specific ports
-"""
-
-import os
-import subprocess
-from pathlib import Path
-import argparse
-
-# Test machine IPs (IPv4 and IPv6 from your provided info)
-TEST_IPV4 = '10.0.0.50'
-TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e'
-TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d'
-
-# Port mappings
-PORT_MAP = {
-    'udp': [53],                    # DNS-over-UDP
-    'tls': [53, 853],               # DNS-over-TLS
-    'https': [53, 443],             # DNS-over-HTTPS (DoH)
-    'doq': [53, 784, 8853],         # DNS-over-QUIC
-    'doh3': [53, 443]               # DNS-over-HTTP/3
-}
-
-# AdGuard DNS IPs to filter out (for non-AdGuard captures)
-ADGUARD_IPS = [
-    '94.140.14.14',
-    '94.140.15.15',
-    '2a10:50c0::ad1:ff',
-    '2a10:50c0::ad2:ff'
-]
-
-def parse_filename(filename):
-    """Extract protocol from filename"""
-    base = filename.replace('.pcap', '').replace('.csv', '')
-    parts = base.split('-')
-    
-    if len(parts) < 1:  # Minimum: protocol
-        return None
-    
-    protocol = parts[0].lower()
-    return protocol
-
-def extract_resolver_from_path(pcap_path):
-    """Extract resolver name from directory structure"""
-    parts = Path(pcap_path).parts
-    
-    for part in parts:
-        if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
-            return part.lower()
-    
-    return None
-
-def build_filter_expression(protocol, resolver):
-    """
-    Build tshark filter expression.
-    
-    Strategy:
-    1. Only protocol-specific DNS ports
-    2. Keep only traffic involving the test machine (IPv4/IPv6)
-    3. Exclude AdGuard IPs for non-AdGuard captures
-    """
-    
-    # Get ports for this protocol
-    ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853])
-    
-    # Build port filter (UDP or TCP on these ports)
-    port_conditions = []
-    for port in ports:
-        port_conditions.append(f'(udp.port == {port} or tcp.port == {port})')
-    
-    port_filter = ' or '.join(port_conditions)
-    
-    # Build test machine filter (keep if src or dst is test machine IP)
-    machine_conditions = [f'(ip.addr == {TEST_IPV4})']
-    if TEST_IPV6_GLOBAL:
-        machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})')
-    if TEST_IPV6_LINKLOCAL:
-        machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})')
-    
-    machine_filter = ' or '.join(machine_conditions)
-    
-    # Build AdGuard exclusion filter
-    adguard_exclusions = []
-    if resolver != 'adguard':
-        for ip in ADGUARD_IPS:
-            if ':' in ip:  # IPv6
-                adguard_exclusions.append(f'!(ipv6.addr == {ip})')
-            else:  # IPv4
-                adguard_exclusions.append(f'!(ip.addr == {ip})')
-    
-    # Combine all filters
-    filters = [f'({port_filter})', f'({machine_filter})']
-    
-    if adguard_exclusions:
-        adguard_filter = ' and '.join(adguard_exclusions)
-        filters.append(f'({adguard_filter})')
-    
-    final_filter = ' and '.join(filters)
-    
-    return final_filter
-
-def filter_pcap(input_path, output_path, filter_expr, verbose=False):
-    """Apply filter to PCAP file using tshark"""
-    
-    cmd = [
-        'tshark',
-        '-r', input_path,
-        '-Y', filter_expr,
-        '-w', output_path,
-        '-F', 'pcap'
-    ]
-    
-    try:
-        if verbose:
-            print(f"  Filter: {filter_expr}")
-        
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300
-        )
-        
-        if result.returncode != 0:
-            print(f"  ✗ Error: {result.stderr.strip()}")
-            return False
-        
-        if not os.path.exists(output_path):
-            print(f"  ✗ Output file not created")
-            return False
-        
-        output_size = os.path.getsize(output_path)
-        if output_size < 24:
-            print(f"  ⚠ Warning: Output is empty")
-        
-        return True
-        
-    except subprocess.TimeoutExpired:
-        print(f"  ✗ Timeout (>5 minutes)")
-        return False
-    except Exception as e:
-        print(f"  ✗ Exception: {e}")
-        return False
-
-def find_pcap_files(root_dir):
-    """Recursively find all PCAP files"""
-    pcap_files = []
-    for root, dirs, files in os.walk(root_dir):
-        for file in files:
-            if file.endswith('.pcap'):
-                full_path = os.path.join(root, file)
-                pcap_files.append(full_path)
-    return sorted(pcap_files)
-
-def format_bytes(bytes_val):
-    """Format bytes as human readable"""
-    for unit in ['B', 'KB', 'MB', 'GB']:
-        if bytes_val < 1024.0:
-            return f"{bytes_val:.1f} {unit}"
-        bytes_val /= 1024.0
-    return f"{bytes_val:.1f} TB"
-
-def main():
-    parser = argparse.ArgumentParser(
-        description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)',
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog='''
-Filtering rules:
-  1. Only include traffic on protocol-specific DNS ports
-  2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses)
-  3. Exclude AdGuard IPs for non-AdGuard captures
-
-Protocol-specific ports:
-  udp:   53
-  tls:   53, 853
-  https: 53, 443
-  doq:   53, 784, 8853
-  doh3:  53, 443
-
-Examples:
-  # Dry run
-  %(prog)s ./results --dry-run
-  
-  # Filter with verbose output
-  %(prog)s ./results --verbose
-  
-  # Custom output directory
-  %(prog)s ./results --output ./cleaned
-        '''
-    )
-    
-    parser.add_argument(
-        'input_dir',
-        help='Input directory containing PCAP files'
-    )
-    parser.add_argument(
-        '-o', '--output',
-        default='./results_filtered',
-        help='Output directory (default: ./results_filtered)'
-    )
-    parser.add_argument(
-        '--dry-run',
-        action='store_true',
-        help='Show what would be done without filtering'
-    )
-    parser.add_argument(
-        '--limit',
-        type=int,
-        help='Only process first N files (for testing)'
-    )
-    parser.add_argument(
-        '-v', '--verbose',
-        action='store_true',
-        help='Verbose output (show filter expressions)'
-    )
-    parser.add_argument(
-        '--overwrite',
-        action='store_true',
-        help='Overwrite existing filtered files'
-    )
-    
-    args = parser.parse_args()
-    
-    # Check for tshark
-    try:
-        result = subprocess.run(
-            ['tshark', '-v'],
-            capture_output=True,
-            check=True
-        )
-        if args.verbose:
-            version = result.stdout.decode().split('\n')[0]
-            print(f"Using: {version}\n")
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        print("Error: tshark not found. Install Wireshark/tshark:")
-        print("  Ubuntu/Debian: sudo apt-get install tshark")
-        print("  macOS: brew install wireshark")
-        return 1
-    
-    print("=" * 80)
-    print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)")
-    print("=" * 80)
-    print("Filters:")
-    print("  1. Protocol-specific DNS ports only")
-    print("  2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)")
-    print("  3. Exclude AdGuard IPs (for non-AdGuard captures)")
-    print(f"\nInput:  {args.input_dir}")
-    print(f"Output: {args.output}")
-    
-    # Find PCAP files
-    print(f"\nScanning for PCAP files...")
-    pcap_files = find_pcap_files(args.input_dir)
-    
-    if not pcap_files:
-        print(f"No PCAP files found in {args.input_dir}")
-        return 1
-    
-    print(f"Found {len(pcap_files)} PCAP files")
-    
-    total_input_size = sum(os.path.getsize(f) for f in pcap_files)
-    print(f"Total size: {format_bytes(total_input_size)}")
-    
-    if args.limit:
-        pcap_files = pcap_files[:args.limit]
-        print(f"Limiting to first {args.limit} files")
-    
-    if args.dry_run:
-        print("\n*** DRY RUN MODE ***\n")
-    else:
-        print()
-    
-    # Process files
-    success_count = 0
-    skip_count = 0
-    fail_count = 0
-    total_output_size = 0
-    
-    for i, input_path in enumerate(pcap_files, 1):
-        # Extract info from path
-        filename = Path(input_path).name
-        protocol = parse_filename(filename)
-        resolver = extract_resolver_from_path(input_path)
-        
-        if not protocol:
-            print(f"[{i}/{len(pcap_files)}] {filename}")
-            print(f"  ⚠ Could not parse protocol, skipping")
-            skip_count += 1
-            continue
-        
-        # Create output path
-        rel_path = os.path.relpath(input_path, args.input_dir)
-        output_path = os.path.join(args.output, rel_path)
-        
-        input_size = os.path.getsize(input_path)
-        
-        print(f"[{i}/{len(pcap_files)}] {rel_path}")
-        print(f"  Protocol: {protocol.upper()}")
-        print(f"  Resolver: {resolver or 'unknown'}")
-        print(f"  Size: {format_bytes(input_size)}")
-        
-        # Check if already filtered
-        if os.path.exists(output_path) and not args.overwrite:
-            output_size = os.path.getsize(output_path)
-            reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
-            print(f"  ⊙ Already filtered: {format_bytes(output_size)} "
-                  f"({reduction:.1f}% reduction)")
-            skip_count += 1
-            total_output_size += output_size
-            continue
-        
-        # Build filter
-        filter_expr = build_filter_expression(protocol, resolver)
-        
-        if args.dry_run:
-            print(f"  → Would filter")
-            if args.verbose:
-                print(f"  Filter: {filter_expr}")
-            continue
-        
-        # Create output directory
-        os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        
-        # Filter
-        success = filter_pcap(input_path, output_path, filter_expr, args.verbose)
-        
-        if success:
-            output_size = os.path.getsize(output_path)
-            reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
-            print(f"  ✓ Filtered: {format_bytes(output_size)} "
-                  f"({reduction:.1f}% reduction)")
-            success_count += 1
-            total_output_size += output_size
-        else:
-            fail_count += 1
-    
-    # Summary
-    print("\n" + "=" * 80)
-    print("SUMMARY")
-    print("=" * 80)
-    
-    if args.dry_run:
-        print(f"Would process: {len(pcap_files)} files")
-    else:
-        print(f"Successful:    {success_count}")
-        print(f"Skipped:       {skip_count} (already filtered or unparseable)")
-        print(f"Failed:        {fail_count}")
-        print(f"Total:         {len(pcap_files)}")
-        
-        if success_count > 0 or skip_count > 0:
-            print(f"\nInput size:    {format_bytes(total_input_size)}")
-            print(f"Output size:   {format_bytes(total_output_size)}")
-            if total_input_size > 0:
-                reduction = ((total_input_size - total_output_size) / 
-                            total_input_size * 100)
-                print(f"Reduction:     {reduction:.1f}%")
-            print(f"\nOutput directory: {args.output}")
-    
-    return 0 if fail_count == 0 else 1
-
-if __name__ == "__main__":
-    exit(main())
@@ -1,426 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert DNS CSV files to SQLite database.
-Creates a single normalized table with unified DNSSEC handling.
-"""
-
-import sqlite3
-import csv
-from pathlib import Path
-from dateutil import parser as date_parser
-
-
-def create_database_schema(conn: sqlite3.Connection):
-    """Create the database schema with indexes."""
-    cursor = conn.cursor()
-    
-    # Main queries table
-    cursor.execute("""
-        CREATE TABLE IF NOT EXISTS dns_queries (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            
-            -- Metadata
-            provider TEXT NOT NULL,
-            protocol TEXT NOT NULL,
-            dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
-            
-            -- Query details
-            domain TEXT NOT NULL,
-            query_type TEXT NOT NULL,
-            keep_alive BOOLEAN NOT NULL,
-            dns_server TEXT NOT NULL,
-            
-            -- Timing
-            timestamp TEXT NOT NULL,
-            timestamp_unix REAL NOT NULL,
-            duration_ns INTEGER NOT NULL,
-            duration_ms REAL NOT NULL,
-            
-            -- Size metrics
-            request_size_bytes INTEGER,
-            response_size_bytes INTEGER,
-            
-            -- Network metrics (from PCAP)
-            bytes_sent INTEGER DEFAULT 0,
-            bytes_received INTEGER DEFAULT 0,
-            packets_sent INTEGER DEFAULT 0,
-            packets_received INTEGER DEFAULT 0,
-            total_bytes INTEGER DEFAULT 0,
-            
-            -- Response
-            response_code TEXT,
-            error TEXT
-        )
-    """)
-    
-    # Create indexes for common queries
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_provider 
-        ON dns_queries(provider)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_protocol 
-        ON dns_queries(protocol)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_dnssec_mode 
-        ON dns_queries(dnssec_mode)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_keep_alive 
-        ON dns_queries(keep_alive)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec 
-        ON dns_queries(provider, protocol, dnssec_mode)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_timestamp 
-        ON dns_queries(timestamp_unix)
-    """)
-    
-    cursor.execute("""
-        CREATE INDEX IF NOT EXISTS idx_domain 
-        ON dns_queries(domain)
-    """)
-    
-    conn.commit()
-
-
-def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
-    """
-    Extract base protocol, DNSSEC mode, and keep_alive from filename.
-    Returns (base_protocol, dnssec_mode, keep_alive)
-    
-    Examples:
-        'udp.csv' -> ('udp', 'off', False)
-        'udp-auth.csv' -> ('udp', 'auth', False)
-        'tls.csv' -> ('tls', 'off', False)
-        'tls-persist.csv' -> ('tls', 'off', True)
-        'https-persist.csv' -> ('https', 'off', True)
-        'https-auth-persist.csv' -> ('https', 'auth', True)
-        'https-trust-persist.csv' -> ('https', 'trust', True)
-        'doh3-auth.csv' -> ('doh3', 'auth', False)
-        'doq.csv' -> ('doq', 'off', False)
-    """
-    name = filename.replace('.csv', '')
-    
-    # Check for persist suffix (keep_alive)
-    keep_alive = False
-    if name.endswith('-persist'):
-        keep_alive = True
-        name = name.replace('-persist', '')
-    
-    # Check for DNSSEC suffix
-    dnssec_mode = 'off'
-    if name.endswith('-auth'):
-        dnssec_mode = 'auth'
-        name = name.replace('-auth', '')
-    elif name.endswith('-trust'):
-        dnssec_mode = 'trust'
-        name = name.replace('-trust', '')
-    
-    # For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
-    if name in ['udp', 'doh3', 'doq']:
-        keep_alive = False
-    
-    return (name, dnssec_mode, keep_alive)
-
-
-def str_to_bool(value: str) -> bool:
-    """Convert string boolean to Python bool."""
-    return value.lower() in ('true', '1', 'yes')
-
-
-def import_csv_to_db(
-    csv_path: Path,
-    provider: str,
-    conn: sqlite3.Connection
-) -> int:
-    """Import a CSV file into the database."""
-    protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
-    
-    cursor = conn.cursor()
-    rows_imported = 0
-    
-    with open(csv_path, 'r', encoding='utf-8') as f:
-        reader = csv.DictReader(f)
-        
-        for row in reader:
-            try:
-                # Parse timestamp to Unix epoch
-                dt = date_parser.isoparse(row['timestamp'])
-                timestamp_unix = dt.timestamp()
-                
-                # Use keep_alive from filename (more reliable than CSV)
-                keep_alive = keep_alive_from_filename
-                
-                # Handle optional fields (may not exist in older CSVs)
-                bytes_sent = int(row.get('bytes_sent', 0) or 0)
-                bytes_received = int(row.get('bytes_received', 0) or 0)
-                packets_sent = int(row.get('packets_sent', 0) or 0)
-                packets_received = int(row.get('packets_received', 0) or 0)
-                total_bytes = int(row.get('total_bytes', 0) or 0)
-                
-                cursor.execute("""
-                    INSERT INTO dns_queries (
-                        provider, protocol, dnssec_mode,
-                        domain, query_type, keep_alive,
-                        dns_server, timestamp, timestamp_unix,
-                        duration_ns, duration_ms,
-                        request_size_bytes, response_size_bytes,
-                        bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
-                        response_code, error
-                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-                """, (
-                    provider,
-                    protocol,
-                    dnssec_mode,
-                    row['domain'],
-                    row['query_type'],
-                    keep_alive,
-                    row['dns_server'],
-                    row['timestamp'],
-                    timestamp_unix,
-                    int(row['duration_ns']),
-                    float(row['duration_ms']),
-                    int(row.get('request_size_bytes') or 0),
-                    int(row.get('response_size_bytes') or 0),
-                    bytes_sent,
-                    bytes_received,
-                    packets_sent,
-                    packets_received,
-                    total_bytes,
-                    row.get('response_code', ''),
-                    row.get('error', '')
-                ))
-                
-                rows_imported += 1
-                
-            except Exception as e:
-                print(f"      Warning: Skipping row - {e}")
-                continue
-    
-    conn.commit()
-    return rows_imported
-
-
-def main():
-    """Main import pipeline."""
-    print("\n" + "="*60)
-    print("CSV to SQLite Database Converter")
-    print("="*60)
-    
-    results_dir = Path('results')
-    db_path = Path('dns.db')
-    
-    if not results_dir.exists():
-        print(f"\n❌ Error: '{results_dir}' directory not found")
-        return
-    
-    # Remove existing database
-    if db_path.exists():
-        print(f"\n⚠ Removing existing database: {db_path}")
-        db_path.unlink()
-    
-    # Create database and schema
-    print(f"\n📊 Creating database: {db_path}")
-    conn = sqlite3.connect(db_path)
-    create_database_schema(conn)
-    print("✓ Schema created")
-    
-    # Import CSVs
-    providers = ['adguard', 'cloudflare', 'google', 'quad9']
-    total_rows = 0
-    total_files = 0
-    
-    for provider in providers:
-        provider_path = results_dir / provider
-        
-        if not provider_path.exists():
-            print(f"\n⚠ Skipping {provider} - directory not found")
-            continue
-        
-        print(f"\n{'='*60}")
-        print(f"Importing: {provider.upper()}")
-        print(f"{'='*60}")
-        
-        csv_files = sorted(provider_path.glob('*.csv'))
-        provider_rows = 0
-        provider_files = 0
-        
-        for csv_path in csv_files:
-            # Skip backup files
-            if '.bak' in csv_path.name:
-                continue
-            
-            protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
-            ka_str = "persistent" if keep_alive else "non-persist"
-            print(f"  📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
-            
-            rows = import_csv_to_db(csv_path, provider, conn)
-            print(f"     ✓ Imported {rows:,} rows")
-            
-            provider_rows += rows
-            provider_files += 1
-        
-        print(f"\n  Total: {provider_files} files, {provider_rows:,} rows")
-        total_rows += provider_rows
-        total_files += provider_files
-    
-    # Create summary
-    print(f"\n{'='*60}")
-    print("Database Summary")
-    print(f"{'='*60}")
-    
-    cursor = conn.cursor()
-    
-    # Total counts
-    cursor.execute("SELECT COUNT(*) FROM dns_queries")
-    total_queries = cursor.fetchone()[0]
-    
-    cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
-    unique_providers = cursor.fetchone()[0]
-    
-    cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
-    unique_protocols = cursor.fetchone()[0]
-    
-    cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
-    unique_domains = cursor.fetchone()[0]
-    
-    print(f"\nTotal queries:     {total_queries:,}")
-    print(f"Providers:         {unique_providers}")
-    print(f"Protocols:         {unique_protocols}")
-    print(f"Unique domains:    {unique_domains}")
-    
-    # Show breakdown by provider, protocol, DNSSEC, and keep_alive
-    print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
-    print(f"{'-'*80}")
-    
-    cursor.execute("""
-        SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
-        FROM dns_queries
-        GROUP BY provider, protocol, dnssec_mode, keep_alive
-        ORDER BY provider, protocol, dnssec_mode, keep_alive
-    """)
-    
-    current_provider = None
-    for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
-        if current_provider != provider:
-            if current_provider is not None:
-                print()
-            current_provider = provider
-        
-        ka_str = "✓" if keep_alive else "✗"
-        print(f"  {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
-    
-    # Protocol distribution
-    print(f"\n{'-'*80}")
-    print("Protocol Distribution:")
-    print(f"{'-'*80}")
-    
-    cursor.execute("""
-        SELECT protocol, COUNT(*) as count
-        FROM dns_queries
-        GROUP BY protocol
-        ORDER BY protocol
-    """)
-    
-    for protocol, count in cursor.fetchall():
-        pct = (count / total_queries) * 100
-        print(f"  {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
-    
-    # DNSSEC mode distribution
-    print(f"\n{'-'*80}")
-    print("DNSSEC Mode Distribution:")
-    print(f"{'-'*80}")
-    
-    cursor.execute("""
-        SELECT dnssec_mode, COUNT(*) as count
-        FROM dns_queries
-        GROUP BY dnssec_mode
-        ORDER BY dnssec_mode
-    """)
-    
-    for dnssec_mode, count in cursor.fetchall():
-        pct = (count / total_queries) * 100
-        print(f"  {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
-    
-    # Keep-Alive distribution
-    print(f"\n{'-'*80}")
-    print("Keep-Alive Distribution:")
-    print(f"{'-'*80}")
-    
-    cursor.execute("""
-        SELECT keep_alive, COUNT(*) as count
-        FROM dns_queries
-        GROUP BY keep_alive
-    """)
-    
-    for keep_alive, count in cursor.fetchall():
-        ka_label = "Persistent" if keep_alive else "Non-persistent"
-        pct = (count / total_queries) * 100
-        print(f"  {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
-    
-    conn.close()
-    
-    print(f"\n{'='*60}")
-    print(f"✓ Database created successfully: {db_path}")
-    print(f"  Total: {total_files} files, {total_rows:,} rows")
-    print(f"{'='*60}\n")
-    
-    # Print usage examples
-    print("\n📖 Usage Examples for Metabase:")
-    print(f"{'-'*60}")
-    
-    print("\n1. Compare protocols (DNSSEC off, persistent only):")
-    print("""   SELECT provider, protocol, 
-          AVG(duration_ms) as avg_latency,
-          AVG(total_bytes) as avg_bytes
-      FROM dns_queries
-      WHERE dnssec_mode = 'off' AND keep_alive = 1
-      GROUP BY provider, protocol;""")
-    
-    print("\n2. DNSSEC impact on UDP:")
-    print("""   SELECT provider, dnssec_mode,
-          AVG(duration_ms) as avg_latency
-      FROM dns_queries
-      WHERE protocol = 'udp'
-      GROUP BY provider, dnssec_mode;""")
-    
-    print("\n3. Keep-alive impact on TLS:")
-    print("""   SELECT provider, keep_alive,
-          AVG(duration_ms) as avg_latency,
-          AVG(total_bytes) as avg_bytes
-      FROM dns_queries
-      WHERE protocol = 'tls' AND dnssec_mode = 'off'
-      GROUP BY provider, keep_alive;""")
-    
-    print("\n4. Time series for line graphs:")
-    print("""   SELECT timestamp_unix, duration_ms, total_bytes
-      FROM dns_queries
-      WHERE provider = 'cloudflare' 
-        AND protocol = 'https'
-        AND dnssec_mode = 'off'
-        AND keep_alive = 1
-      ORDER BY timestamp_unix;""")
-    
-    print("\n5. Overall comparison table:")
-    print("""   SELECT protocol, dnssec_mode, keep_alive,
-          COUNT(*) as queries,
-          AVG(duration_ms) as avg_latency,
-          AVG(total_bytes) as avg_bytes
-      FROM dns_queries
-      GROUP BY protocol, dnssec_mode, keep_alive
-      ORDER BY protocol, dnssec_mode, keep_alive;""")
-    
-    print(f"\n{'-'*60}\n")
-
-
-if __name__ == '__main__':
-    main()
@@ -1,274 +0,0 @@
-#!/usr/bin/env python3
-"""
-Merge DNS test files by configuration.
-
- Merges CSVs of same config (adds 'run_id' column for traceability)
- Optionally merges PCAPs using mergecap
- Flattens date structure
-"""
-
-import os
-import csv
-import subprocess
-import shutil
-from pathlib import Path
-import argparse
-from collections import defaultdict
-
-def parse_filename(filename):
-    """
-    Extract config key from filename.
-    Format: protocol[-flags]-timestamp.{csv,pcap}
-    Config key: protocol[-flags] (ignores timestamp)
-    """
-    base = filename.replace('.csv', '').replace('.pcap', '')
-    parts = base.split('-')
-    
-    if len(parts) < 2:
-        return None
-    
-    # Config is everything except timestamp
-    config = '-'.join(parts[:-1])
-    timestamp = parts[-1]
-    
-    return config, timestamp
-
-def extract_resolver_from_path(file_path):
-    """Extract resolver name from path"""
-    parts = Path(file_path).parts
-    for part in parts:
-        if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
-            return part.lower()
-    return None
-
-def find_files(root_dir, extension):
-    """Find all files with given extension"""
-    files = []
-    for root, dirs, filenames in os.walk(root_dir):
-        for filename in filenames:
-            if filename.endswith(extension):
-                full_path = os.path.join(root, filename)
-                files.append(full_path)
-    return sorted(files)
-
-def merge_csvs(csv_files, output_path, fieldnames):
-    """Merge multiple CSVs into one, adding 'run_id' column"""
-    with open(output_path, 'w', newline='') as outfile:
-        writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id'])
-        writer.writeheader()
-        
-        for csv_path in csv_files:
-            # Use timestamp as run_id
-            filename = Path(csv_path).name
-            _, timestamp = parse_filename(filename)
-            run_id = timestamp  # Or add date if needed
-            
-            with open(csv_path, 'r', newline='') as infile:
-                reader = csv.DictReader(infile)
-                for row in reader:
-                    row['run_id'] = run_id
-                    writer.writerow(row)
-
-def merge_pcaps(pcap_files, output_path):
-    """Merge PCAP files using mergecap"""
-    cmd = ['mergecap', '-w', output_path] + pcap_files
-    try:
-        subprocess.run(cmd, capture_output=True, check=True)
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"  ✗ mergecap error: {e.stderr.decode()}")
-        return False
-    except FileNotFoundError:
-        print("Error: mergecap not found. Install Wireshark:")
-        print("  Ubuntu: sudo apt install wireshark-common")
-        print("  macOS: brew install wireshark")
-        return False
-
-def format_bytes(bytes_val):
-    """Format bytes as human readable"""
-    for unit in ['B', 'KB', 'MB', 'GB']:
-        if bytes_val < 1024.0:
-            return f"{bytes_val:.1f} {unit}"
-        bytes_val /= 1024.0
-    return f"{bytes_val:.1f} TB"
-
-def main():
-    parser = argparse.ArgumentParser(
-        description='Merge DNS test files by configuration',
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog='''
-Merges files of same config across dates/timestamps.
-Output: ./results_merged/[resolver]/[config].csv (merged)
-        ./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps)
-
-Examples:
-  # Dry run to preview
-  %(prog)s ./results --dry-run
-  
-  # Merge CSVs only (recommended)
-  %(prog)s ./results
-  
-  # Merge CSVs and PCAPs
-  %(prog)s ./results --merge-pcaps
-  
-  # Custom output directory
-  %(prog)s ./results --output ./merged_data
-        '''
-    )
-    
-    parser.add_argument(
-        'input_dir',
-        help='Input directory (e.g., ./results)'
-    )
-    parser.add_argument(
-        '--output',
-        default='./results_merged',
-        help='Output directory (default: ./results_merged)'
-    )
-    parser.add_argument(
-        '--merge-pcaps',
-        action='store_true',
-        help='Merge PCAP files (requires mergecap from Wireshark)'
-    )
-    parser.add_argument(
-        '--dry-run',
-        action='store_true',
-        help='Show what would be done without merging'
-    )
-    parser.add_argument(
-        '-y', '--yes',
-        action='store_true',
-        help='Skip confirmation prompt'
-    )
-    
-    args = parser.parse_args()
-    
-    if not os.path.isdir(args.input_dir):
-        print(f"Error: Input directory not found: {args.input_dir}")
-        return 1
-    
-    # Find all files
-    print("=" * 80)
-    print("MERGE DNS TEST FILES")
-    print("=" * 80)
-    print(f"Input:  {args.input_dir}")
-    print(f"Output: {args.output}")
-    print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}")
-    
-    csv_files = find_files(args.input_dir, '.csv')
-    pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else []
-    
-    if not csv_files and not pcap_files:
-        print("\nNo CSV/PCAP files found")
-        return 1
-    
-    print(f"\nFound {len(csv_files)} CSV files")
-    if args.merge_pcaps:
-        print(f"Found {len(pcap_files)} PCAP files")
-    
-    # Group files by resolver and config
-    csv_groups = defaultdict(list)
-    pcap_groups = defaultdict(list)
-    
-    for csv_path in csv_files:
-        config, _ = parse_filename(Path(csv_path).name)
-        resolver = extract_resolver_from_path(csv_path)
-        if config and resolver:
-            key = (resolver, config)
-            csv_groups[key].append(csv_path)
-    
-    for pcap_path in pcap_files:
-        config, _ = parse_filename(Path(pcap_path).name)
-        resolver = extract_resolver_from_path(pcap_path)
-        if config and resolver:
-            key = (resolver, config)
-            pcap_groups[key].append(pcap_path)
-    
-    # Summary
-    print("\nConfigs to merge:")
-    print("-" * 80)
-    for (resolver, config), files in sorted(csv_groups.items()):
-        print(f"  {resolver}/{config}: {len(files)} runs")
-    
-    total_runs = sum(len(files) for files in csv_groups.values())
-    print(f"\nTotal configs: {len(csv_groups)}")
-    print(f"Total runs:    {total_runs}")
-    
-    if args.dry_run:
-        print("\n*** DRY RUN MODE ***\n")
-        for (resolver, config) in sorted(csv_groups.keys()):
-            print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)")
-            if args.merge_pcaps and (resolver, config) in pcap_groups:
-                print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)")
-        return 0
-    
-    # Confirmation
-    if not args.yes:
-        response = input(f"\nMerge all into {args.output}? [y/N] ")
-        if response.lower() not in ['y', 'yes']:
-            print("Cancelled")
-            return 0
-    
-    # Merge
-    print("\n" + "=" * 80)
-    print("MERGING FILES")
-    print("=" * 80)
-    
-    success_count = 0
-    fail_count = 0
-    total_queries = 0
-    total_size = 0
-    
-    # Get standard CSV fieldnames (from first file)
-    first_csv = next(iter(csv_files))
-    with open(first_csv, 'r') as f:
-        reader = csv.DictReader(f)
-        fieldnames = reader.fieldnames
-    
-    for (resolver, config), files in sorted(csv_groups.items()):
-        print(f"\n{resolver}/{config} ({len(files)} runs)")
-        
-        # Merge CSVs
-        output_csv = os.path.join(args.output, resolver, f"{config}.csv")
-        os.makedirs(os.path.dirname(output_csv), exist_ok=True)
-        
-        merge_csvs(files, output_csv, fieldnames)
-        
-        # Count queries in merged file
-        with open(output_csv, 'r') as f:
-            query_count = sum(1 for _ in csv.reader(f)) - 1  # Minus header
-        
-        print(f"  ✓ Merged CSV: {query_count:,} queries")
-        total_queries += query_count
-        success_count += 1
-        
-        # Merge PCAPs if requested
-        if args.merge_pcaps and (resolver, config) in pcap_groups:
-            output_pcap = os.path.join(args.output, resolver, f"{config}.pcap")
-            pcap_list = pcap_groups[(resolver, config)]
-            
-            if merge_pcaps(pcap_list, output_pcap):
-                merged_size = os.path.getsize(output_pcap)
-                orig_size = sum(os.path.getsize(p) for p in pcap_list)
-                print(f"  ✓ Merged PCAP: {format_bytes(merged_size)} "
-                      f"(from {format_bytes(orig_size)})")
-                total_size += merged_size
-            else:
-                print(f"  ✗ PCAP merge failed")
-                fail_count += 1
-    
-    # Final summary
-    print("\n" + "=" * 80)
-    print("COMPLETE")
-    print("=" * 80)
-    print(f"Successful configs: {success_count}")
-    print(f"Failed:            {fail_count}")
-    print(f"Total queries:     {total_queries:,}")
-    if args.merge_pcaps:
-        print(f"Total PCAP size:   {format_bytes(total_size)}")
-    print(f"\nMerged files in: {args.output}")
-    
-    return 0 if fail_count == 0 else 1
-
-if __name__ == "__main__":
-    exit(main())