feat(profiling): Add CPU and MEM profiling
This commit is contained in:
Generated
-27
@@ -1,27 +0,0 @@
|
|||||||
{
|
|
||||||
"nodes": {
|
|
||||||
"nixpkgs": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1760103332,
|
|
||||||
"narHash": "sha256-BMsGVfKl4Q80Pr9T1AkCRljO1bpwCmY8rTBVj8XGuhA=",
|
|
||||||
"owner": "NixOS",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"rev": "870493f9a8cb0b074ae5b411b2f232015db19a65",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "NixOS",
|
|
||||||
"ref": "nixpkgs-unstable",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": "nixpkgs"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": "root",
|
|
||||||
"version": 7
|
|
||||||
}
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
{
|
|
||||||
description = "A Nix-flake-based Go 1.22 development environment";
|
|
||||||
|
|
||||||
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
|
|
||||||
|
|
||||||
outputs = inputs:
|
|
||||||
let
|
|
||||||
goVersion = 25; # Change this to update the whole stack
|
|
||||||
|
|
||||||
supportedSystems = [ "aarch64-darwin" ];
|
|
||||||
forEachSupportedSystem = f: inputs.nixpkgs.lib.genAttrs supportedSystems (system: f {
|
|
||||||
pkgs = import inputs.nixpkgs {
|
|
||||||
inherit system;
|
|
||||||
overlays = [ inputs.self.overlays.default ];
|
|
||||||
};
|
|
||||||
});
|
|
||||||
in
|
|
||||||
{
|
|
||||||
overlays.default = final: prev: {
|
|
||||||
go = final."go_1_${toString goVersion}";
|
|
||||||
};
|
|
||||||
|
|
||||||
devShells = forEachSupportedSystem ({ pkgs }: {
|
|
||||||
default = pkgs.mkShell {
|
|
||||||
packages = with pkgs; [
|
|
||||||
go
|
|
||||||
gotools
|
|
||||||
golangci-lint
|
|
||||||
(pkgs.python3.withPackages (python-pkgs: with python-pkgs; [
|
|
||||||
pandas
|
|
||||||
matplotlib
|
|
||||||
seaborn
|
|
||||||
]))
|
|
||||||
];
|
|
||||||
};
|
|
||||||
});
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"github.com/afonsofrancof/sdns-proxy/client"
|
"github.com/afonsofrancof/sdns-proxy/client"
|
||||||
"github.com/afonsofrancof/sdns-proxy/internal/qol/capture"
|
"github.com/afonsofrancof/sdns-proxy/internal/qol/capture"
|
||||||
"github.com/afonsofrancof/sdns-proxy/internal/qol/results"
|
"github.com/afonsofrancof/sdns-proxy/internal/qol/results"
|
||||||
|
"github.com/afonsofrancof/sdns-proxy/internal/qol/stats"
|
||||||
"github.com/google/gopacket/pcap"
|
"github.com/google/gopacket/pcap"
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
)
|
)
|
||||||
@@ -86,13 +87,16 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT
|
|||||||
defer dnsClient.Close()
|
defer dnsClient.Close()
|
||||||
|
|
||||||
// Setup output files
|
// Setup output files
|
||||||
csvPath, pcapPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)
|
csvPath, pcapPath, memPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)
|
||||||
|
|
||||||
// Create directory if it doesn't exist
|
// Create directory if it doesn't exist
|
||||||
if err := os.MkdirAll(filepath.Dir(csvPath), 0755); err != nil {
|
if err := os.MkdirAll(filepath.Dir(csvPath), 0755); err != nil {
|
||||||
return fmt.Errorf("failed to create output directory: %w", err)
|
return fmt.Errorf("failed to create output directory: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize runtime collector with memPath
|
||||||
|
runtimeCollector := stats.NewRuntimeCollector(memPath)
|
||||||
|
|
||||||
keepAliveStr := ""
|
keepAliveStr := ""
|
||||||
if r.config.KeepAlive {
|
if r.config.KeepAlive {
|
||||||
keepAliveStr = " (keep-alive)"
|
keepAliveStr = " (keep-alive)"
|
||||||
@@ -118,7 +122,17 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT
|
|||||||
|
|
||||||
time.Sleep(time.Second)
|
time.Sleep(time.Second)
|
||||||
// Run measurements
|
// Run measurements
|
||||||
return r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
|
err = r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write summed mem stats for the entire run
|
||||||
|
if err := runtimeCollector.WriteStats(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Warning: failed to write mem stats to %s: %v\n", memPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *MeasurementRunner) runQueries(dnsClient client.DNSClient, upstream string,
|
func (r *MeasurementRunner) runQueries(dnsClient client.DNSClient, upstream string,
|
||||||
|
|||||||
@@ -0,0 +1,95 @@
|
|||||||
|
package stats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RuntimeStats struct {
|
||||||
|
TotalAlloc uint64
|
||||||
|
Mallocs uint64
|
||||||
|
NumGC uint32
|
||||||
|
AllocDelta uint64
|
||||||
|
MallocsDelta uint64
|
||||||
|
GCDelta uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
type RuntimeCollector struct {
|
||||||
|
startStats runtime.MemStats
|
||||||
|
memPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewRuntimeCollector(memPath string) *RuntimeCollector {
|
||||||
|
var stats runtime.MemStats
|
||||||
|
runtime.ReadMemStats(&stats)
|
||||||
|
|
||||||
|
return &RuntimeCollector{
|
||||||
|
startStats: stats,
|
||||||
|
memPath: memPath,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc *RuntimeCollector) Collect() RuntimeStats {
|
||||||
|
var current runtime.MemStats
|
||||||
|
runtime.ReadMemStats(¤t)
|
||||||
|
|
||||||
|
return RuntimeStats{
|
||||||
|
TotalAlloc: current.TotalAlloc,
|
||||||
|
Mallocs: current.Mallocs,
|
||||||
|
NumGC: current.NumGC,
|
||||||
|
AllocDelta: current.TotalAlloc - rc.startStats.TotalAlloc,
|
||||||
|
MallocsDelta: current.Mallocs - rc.startStats.Mallocs,
|
||||||
|
GCDelta: current.NumGC - rc.startStats.NumGC,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc *RuntimeCollector) WriteStats() error {
|
||||||
|
stats := rc.Collect()
|
||||||
|
timestamp := time.Now().Format(time.RFC3339Nano)
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
fileExists := false
|
||||||
|
if _, err := os.Stat(rc.memPath); err == nil {
|
||||||
|
fileExists = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open in append mode
|
||||||
|
file, err := os.OpenFile(rc.memPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open mem.csv: %w", err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
writer := csv.NewWriter(file)
|
||||||
|
|
||||||
|
// Write header if new file
|
||||||
|
if !fileExists {
|
||||||
|
header := []string{
|
||||||
|
"timestamp", "total_alloc_bytes", "mallocs", "gc_cycles",
|
||||||
|
"alloc_delta", "mallocs_delta", "gc_delta",
|
||||||
|
}
|
||||||
|
if err := writer.Write(header); err != nil {
|
||||||
|
return fmt.Errorf("failed to write mem.csv header: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write data row
|
||||||
|
row := []string{
|
||||||
|
timestamp,
|
||||||
|
fmt.Sprintf("%d", stats.TotalAlloc),
|
||||||
|
fmt.Sprintf("%d", stats.Mallocs),
|
||||||
|
fmt.Sprintf("%d", stats.NumGC),
|
||||||
|
fmt.Sprintf("%d", stats.AllocDelta),
|
||||||
|
fmt.Sprintf("%d", stats.MallocsDelta),
|
||||||
|
fmt.Sprintf("%d", stats.GCDelta),
|
||||||
|
}
|
||||||
|
if err := writer.Write(row); err != nil {
|
||||||
|
return fmt.Errorf("failed to write mem.csv row: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.Flush()
|
||||||
|
return writer.Error()
|
||||||
|
}
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
// ./internal/qol/utils.go
|
|
||||||
package qol
|
package qol
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -8,7 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath string) {
|
func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath, memPath string) {
|
||||||
proto := DetectProtocol(upstream)
|
proto := DetectProtocol(upstream)
|
||||||
cleanServer := cleanServerName(upstream)
|
cleanServer := cleanServerName(upstream)
|
||||||
|
|
||||||
@@ -32,8 +31,10 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli
|
|||||||
base = fmt.Sprintf("%s-%s", base, strings.Join(flags, "-"))
|
base = fmt.Sprintf("%s-%s", base, strings.Join(flags, "-"))
|
||||||
}
|
}
|
||||||
|
|
||||||
return filepath.Join(subDir, base+".csv"),
|
csvPath = filepath.Join(subDir, base+".csv")
|
||||||
filepath.Join(subDir, base+".pcap")
|
pcapPath = filepath.Join(subDir, base+".pcap")
|
||||||
|
memPath = filepath.Join(subDir, base+".mem.csv")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func cleanServerName(server string) string {
|
func cleanServerName(server string) string {
|
||||||
|
|||||||
@@ -1,187 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Exit on error
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Default values
|
|
||||||
TOOL_PATH="./qol"
|
|
||||||
DOMAINS_FILE="./domains.txt"
|
|
||||||
OUTPUT_DIR="./results"
|
|
||||||
INTERFACE="veth1"
|
|
||||||
TIMEOUT="5s"
|
|
||||||
SLEEP_TIME="1"
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case $1 in
|
|
||||||
-t|--tool-path)
|
|
||||||
TOOL_PATH="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-d|--domains-file)
|
|
||||||
DOMAINS_FILE="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-o|--output-dir)
|
|
||||||
OUTPUT_DIR="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-I|--interface)
|
|
||||||
INTERFACE="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-T|--timeout)
|
|
||||||
TIMEOUT="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-s|--sleep)
|
|
||||||
SLEEP_TIME="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--help)
|
|
||||||
echo "Usage: $0 [OPTIONS]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " -t, --tool-path PATH Path to qol tool (default: ./qol)"
|
|
||||||
echo " -d, --domains-file PATH Path to domains file (default: ./domains.txt)"
|
|
||||||
echo " -o, --output-dir PATH Output directory (default: ./results)"
|
|
||||||
echo " -I, --interface NAME Network interface (default: veth1)"
|
|
||||||
echo " -T, --timeout DURATION Timeout duration (default: 5s)"
|
|
||||||
echo " -s, --sleep SECONDS Sleep between runs (default: 1)"
|
|
||||||
echo " --help Show this help"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown option: $1"
|
|
||||||
echo "Use --help for usage information"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Configuration:"
|
|
||||||
echo " Tool path: $TOOL_PATH"
|
|
||||||
echo " Domains file: $DOMAINS_FILE"
|
|
||||||
echo " Output dir: $OUTPUT_DIR"
|
|
||||||
echo " Interface: $INTERFACE"
|
|
||||||
echo " Timeout: $TIMEOUT"
|
|
||||||
echo " Sleep time: ${SLEEP_TIME}s"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Connection-based protocols that benefit from keep-alive (TCP-based)
|
|
||||||
CONN_SERVERS=(
|
|
||||||
-s "dotcp://8.8.8.8:53"
|
|
||||||
-s "dotcp://1.1.1.1:53"
|
|
||||||
-s "dotcp://9.9.9.9:53"
|
|
||||||
-s "dotcp://dns.adguard-dns.com:53"
|
|
||||||
#-s "tls://8.8.8.8:853"
|
|
||||||
#-s "tls://1.1.1.1:853"
|
|
||||||
#-s "tls://9.9.9.9:853"
|
|
||||||
#-s "tls://dns.adguard-dns.com:853"
|
|
||||||
#-s "https://dns.google/dns-query"
|
|
||||||
#-s "https://cloudflare-dns.com/dns-query"
|
|
||||||
#-s "https://dns10.quad9.net/dns-query"
|
|
||||||
#-s "https://dns.adguard-dns.com/dns-query"
|
|
||||||
)
|
|
||||||
|
|
||||||
# QUIC-based protocols (have built-in 0-RTT, keep-alive doesn't add value)
|
|
||||||
QUIC_SERVERS=(
|
|
||||||
#-s "doh3://dns.google/dns-query"
|
|
||||||
#-s "doh3://cloudflare-dns.com/dns-query"
|
|
||||||
#-s "doh3://dns.adguard-dns.com/dns-query"
|
|
||||||
#-s "doq://dns.adguard-dns.com:853"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Connectionless protocols (no keep-alive)
|
|
||||||
CONNLESS_SERVERS=(
|
|
||||||
# -s "udp://8.8.8.8:53"
|
|
||||||
# -s "udp://1.1.1.1:53"
|
|
||||||
# -s "udp://9.9.9.9:53"
|
|
||||||
# -s "udp://dns.adguard-dns.com:53"
|
|
||||||
-s "sdns://AQMAAAAAAAAAETk0LjE0MC4xNC4xNDo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20"
|
|
||||||
-s "sdns://AQMAAAAAAAAAFDE0OS4xMTIuMTEyLjExMjo4NDQzIGfIR7jIdYzRICRVQ751Z0bfNN8dhMALjEcDaN-CHYY-GTIuZG5zY3J5cHQtY2VydC5xdWFkOS5uZXQ"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Common args
|
|
||||||
COMMON_ARGS=(
|
|
||||||
"$DOMAINS_FILE"
|
|
||||||
--interface "$INTERFACE"
|
|
||||||
--timeout "$TIMEOUT"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combinations for TCP-based connection protocols
|
|
||||||
CONN_COMBINATIONS=(
|
|
||||||
# DNSSEC off, Keep off
|
|
||||||
""
|
|
||||||
|
|
||||||
# DNSSEC off, Keep on
|
|
||||||
"--keep-alive"
|
|
||||||
|
|
||||||
# DNSSEC on (trust), Keep on
|
|
||||||
"--dnssec --keep-alive"
|
|
||||||
|
|
||||||
# DNSSEC on (auth), Keep on
|
|
||||||
"--dnssec --authoritative-dnssec --keep-alive"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combinations for QUIC and connectionless protocols (no keep-alive)
|
|
||||||
NO_KEEPALIVE_COMBINATIONS=(
|
|
||||||
# DNSSEC off
|
|
||||||
""
|
|
||||||
|
|
||||||
# DNSSEC on (trust)
|
|
||||||
"--dnssec"
|
|
||||||
|
|
||||||
# DNSSEC on (auth)
|
|
||||||
"--dnssec --authoritative-dnssec"
|
|
||||||
)
|
|
||||||
|
|
||||||
echo "=== Running TCP-based protocols (TLS/HTTPS) ==="
|
|
||||||
for FLAGS in "${CONN_COMBINATIONS[@]}"; do
|
|
||||||
echo "Running: $FLAGS"
|
|
||||||
|
|
||||||
FLAGS_ARRAY=($FLAGS)
|
|
||||||
|
|
||||||
sudo "$TOOL_PATH" run \
|
|
||||||
--output-dir "$OUTPUT_DIR" \
|
|
||||||
"${COMMON_ARGS[@]}" \
|
|
||||||
"${CONN_SERVERS[@]}" \
|
|
||||||
"${FLAGS_ARRAY[@]}" || true
|
|
||||||
|
|
||||||
sleep "$SLEEP_TIME"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=== Running QUIC-based protocols (DoH3/DoQ) ==="
|
|
||||||
for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
|
|
||||||
echo "Running: $FLAGS"
|
|
||||||
|
|
||||||
FLAGS_ARRAY=($FLAGS)
|
|
||||||
|
|
||||||
sudo "$TOOL_PATH" run \
|
|
||||||
--output-dir "$OUTPUT_DIR" \
|
|
||||||
"${COMMON_ARGS[@]}" \
|
|
||||||
"${QUIC_SERVERS[@]}" \
|
|
||||||
"${FLAGS_ARRAY[@]}" || true
|
|
||||||
|
|
||||||
sleep "$SLEEP_TIME"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=== Running connectionless protocols (UDP) ==="
|
|
||||||
for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
|
|
||||||
echo "Running: $FLAGS"
|
|
||||||
|
|
||||||
FLAGS_ARRAY=($FLAGS)
|
|
||||||
|
|
||||||
sudo "$TOOL_PATH" run \
|
|
||||||
--output-dir "$OUTPUT_DIR" \
|
|
||||||
"${COMMON_ARGS[@]}" \
|
|
||||||
"${CONNLESS_SERVERS[@]}" \
|
|
||||||
"${FLAGS_ARRAY[@]}" || true
|
|
||||||
|
|
||||||
sleep "$SLEEP_TIME"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "All combinations completed!"
|
|
||||||
@@ -1,498 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
from pathlib import Path
|
|
||||||
from scipy import stats
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
warnings.filterwarnings('ignore')
|
|
||||||
|
|
||||||
# Set style for publication-quality plots
|
|
||||||
sns.set_style("whitegrid")
|
|
||||||
plt.rcParams['figure.dpi'] = 300
|
|
||||||
plt.rcParams['savefig.dpi'] = 300
|
|
||||||
plt.rcParams['font.size'] = 10
|
|
||||||
plt.rcParams['figure.figsize'] = (12, 6)
|
|
||||||
|
|
||||||
class DNSAnalyzer:
|
|
||||||
def __init__(self, results_dir='results'):
|
|
||||||
self.results_dir = Path(results_dir)
|
|
||||||
self.df = None
|
|
||||||
|
|
||||||
def load_all_data(self):
|
|
||||||
"""Load all CSV files from the results directory"""
|
|
||||||
data_frames = []
|
|
||||||
|
|
||||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
|
||||||
|
|
||||||
for provider in providers:
|
|
||||||
provider_path = self.results_dir / provider
|
|
||||||
if not provider_path.exists():
|
|
||||||
continue
|
|
||||||
|
|
||||||
for csv_file in provider_path.glob('*.csv'):
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(csv_file)
|
|
||||||
df['provider'] = provider
|
|
||||||
df['test_config'] = csv_file.stem
|
|
||||||
data_frames.append(df)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error loading {csv_file}: {e}")
|
|
||||||
|
|
||||||
self.df = pd.concat(data_frames, ignore_index=True)
|
|
||||||
self._clean_and_enrich_data()
|
|
||||||
print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
|
|
||||||
|
|
||||||
def _clean_and_enrich_data(self):
|
|
||||||
"""Clean data and add useful columns"""
|
|
||||||
# Remove failed queries
|
|
||||||
self.df = self.df[self.df['error'].isna()]
|
|
||||||
|
|
||||||
# Extract protocol base (remove -auth, -trust suffixes)
|
|
||||||
self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
|
|
||||||
|
|
||||||
# DNSSEC configuration
|
|
||||||
self.df['dnssec_mode'] = 'none'
|
|
||||||
self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
|
|
||||||
self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
|
|
||||||
|
|
||||||
# Protocol categories
|
|
||||||
self.df['protocol_category'] = self.df['protocol_base'].map({
|
|
||||||
'udp': 'Plain DNS',
|
|
||||||
'tls': 'DoT',
|
|
||||||
'https': 'DoH',
|
|
||||||
'doh3': 'DoH/3',
|
|
||||||
'doq': 'DoQ'
|
|
||||||
})
|
|
||||||
|
|
||||||
# Connection persistence
|
|
||||||
self.df['persistence'] = self.df['keep_alive'].fillna(False)
|
|
||||||
|
|
||||||
def generate_summary_statistics(self):
|
|
||||||
"""Generate comprehensive summary statistics"""
|
|
||||||
print("\n" + "="*80)
|
|
||||||
print("SUMMARY STATISTICS")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
# Overall statistics
|
|
||||||
print("\n--- Overall Performance ---")
|
|
||||||
print(f"Total queries: {len(self.df)}")
|
|
||||||
print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
|
|
||||||
print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
|
|
||||||
print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
|
|
||||||
print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
|
|
||||||
|
|
||||||
# By protocol
|
|
||||||
print("\n--- Performance by Protocol ---")
|
|
||||||
protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
|
|
||||||
('count', 'count'),
|
|
||||||
('mean', 'mean'),
|
|
||||||
('median', 'median'),
|
|
||||||
('std', 'std'),
|
|
||||||
('p95', lambda x: x.quantile(0.95)),
|
|
||||||
('p99', lambda x: x.quantile(0.99))
|
|
||||||
]).round(2)
|
|
||||||
print(protocol_stats)
|
|
||||||
|
|
||||||
# By provider
|
|
||||||
print("\n--- Performance by Provider ---")
|
|
||||||
provider_stats = self.df.groupby('provider')['duration_ms'].agg([
|
|
||||||
('count', 'count'),
|
|
||||||
('mean', 'mean'),
|
|
||||||
('median', 'median'),
|
|
||||||
('std', 'std'),
|
|
||||||
('p95', lambda x: x.quantile(0.95))
|
|
||||||
]).round(2)
|
|
||||||
print(provider_stats)
|
|
||||||
|
|
||||||
# DNSSEC impact
|
|
||||||
print("\n--- DNSSEC Validation Impact ---")
|
|
||||||
dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
|
|
||||||
('count', 'count'),
|
|
||||||
('mean', 'mean'),
|
|
||||||
('median', 'median'),
|
|
||||||
('overhead_vs_none', lambda x: x.mean())
|
|
||||||
]).round(2)
|
|
||||||
|
|
||||||
# Calculate overhead percentage
|
|
||||||
baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
|
|
||||||
if baseline > 0:
|
|
||||||
dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
|
|
||||||
print(dnssec_stats)
|
|
||||||
|
|
||||||
# Bandwidth analysis
|
|
||||||
print("\n--- Bandwidth Usage ---")
|
|
||||||
bandwidth_stats = self.df.groupby('protocol_category').agg({
|
|
||||||
'request_size_bytes': ['mean', 'median'],
|
|
||||||
'response_size_bytes': ['mean', 'median']
|
|
||||||
}).round(2)
|
|
||||||
print(bandwidth_stats)
|
|
||||||
|
|
||||||
# Persistence impact (where applicable)
|
|
||||||
print("\n--- Connection Persistence Impact ---")
|
|
||||||
persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
|
|
||||||
if len(persist_protocols) > 0:
|
|
||||||
persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
|
|
||||||
('mean', 'mean'),
|
|
||||||
('median', 'median')
|
|
||||||
]).round(2)
|
|
||||||
print(persist_stats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'protocol': protocol_stats,
|
|
||||||
'provider': provider_stats,
|
|
||||||
'dnssec': dnssec_stats,
|
|
||||||
'bandwidth': bandwidth_stats
|
|
||||||
}
|
|
||||||
|
|
||||||
def plot_latency_by_protocol(self, output_dir='plots'):
|
|
||||||
"""Violin plot of latency distribution by protocol"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
plt.figure(figsize=(14, 7))
|
|
||||||
|
|
||||||
# Order protocols logically
|
|
||||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
|
||||||
available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
|
||||||
|
|
||||||
sns.violinplot(data=self.df, x='protocol_category', y='duration_ms',
|
|
||||||
order=available_protocols, inner='box', cut=0)
|
|
||||||
|
|
||||||
plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
|
|
||||||
plt.xlabel('Protocol', fontsize=12)
|
|
||||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
|
||||||
plt.xticks(rotation=0)
|
|
||||||
|
|
||||||
# Add mean values as annotations
|
|
||||||
for i, protocol in enumerate(available_protocols):
|
|
||||||
mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
|
|
||||||
plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: latency_by_protocol.png")
|
|
||||||
|
|
||||||
def plot_provider_comparison(self, output_dir='plots'):
|
|
||||||
"""Box plot comparing providers across protocols"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
|
||||||
fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
|
|
||||||
|
|
||||||
protocols = self.df['protocol_category'].unique()
|
|
||||||
protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
|
|
||||||
|
|
||||||
for idx, protocol in enumerate(protocols[:4]):
|
|
||||||
ax = axes[idx // 2, idx % 2]
|
|
||||||
data = self.df[self.df['protocol_category'] == protocol]
|
|
||||||
|
|
||||||
if len(data) > 0:
|
|
||||||
sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
|
|
||||||
ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
|
|
||||||
ax.set_xlabel('Provider', fontsize=10)
|
|
||||||
ax.set_ylabel('Response Time (ms)', fontsize=10)
|
|
||||||
ax.tick_params(axis='x', rotation=45)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: provider_comparison.png")
|
|
||||||
|
|
||||||
def plot_dnssec_impact(self, output_dir='plots'):
|
|
||||||
"""Compare DNSSEC validation methods (trust vs auth)"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
# Filter for protocols that have DNSSEC variations
|
|
||||||
dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
|
|
||||||
|
|
||||||
if len(dnssec_data) == 0:
|
|
||||||
print("⚠ No DNSSEC data available")
|
|
||||||
return
|
|
||||||
|
|
||||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
||||||
|
|
||||||
# Plot 1: Overall DNSSEC impact
|
|
||||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
|
||||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
|
||||||
|
|
||||||
sns.barplot(data=self.df, x='protocol_category', y='duration_ms',
|
|
||||||
hue='dnssec_mode', order=available, ax=ax1, ci=95)
|
|
||||||
ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
|
|
||||||
ax1.set_xlabel('Protocol', fontsize=10)
|
|
||||||
ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
|
|
||||||
ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
|
|
||||||
ax1.tick_params(axis='x', rotation=0)
|
|
||||||
|
|
||||||
# Plot 2: Trust vs Auth comparison
|
|
||||||
comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
|
|
||||||
pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
|
|
||||||
|
|
||||||
if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
|
|
||||||
pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
|
|
||||||
pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
|
|
||||||
ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
|
|
||||||
ax2.set_xlabel('Protocol', fontsize=10)
|
|
||||||
ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
|
|
||||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
|
||||||
ax2.tick_params(axis='x', rotation=45)
|
|
||||||
ax2.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: dnssec_impact.png")
|
|
||||||
|
|
||||||
def plot_persistence_impact(self, output_dir='plots'):
|
|
||||||
"""Analyze impact of connection persistence"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
|
|
||||||
|
|
||||||
if len(persist_data) == 0:
|
|
||||||
print("⚠ No persistence data available")
|
|
||||||
return
|
|
||||||
|
|
||||||
plt.figure(figsize=(12, 6))
|
|
||||||
|
|
||||||
sns.barplot(data=persist_data, x='protocol_base', y='duration_ms',
|
|
||||||
hue='persistence', ci=95)
|
|
||||||
|
|
||||||
plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
|
|
||||||
plt.xlabel('Protocol', fontsize=12)
|
|
||||||
plt.ylabel('Mean Response Time (ms)', fontsize=12)
|
|
||||||
plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
|
|
||||||
|
|
||||||
# Calculate and annotate overhead reduction
|
|
||||||
for protocol in persist_data['protocol_base'].unique():
|
|
||||||
protocol_data = persist_data[persist_data['protocol_base'] == protocol]
|
|
||||||
|
|
||||||
no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
|
|
||||||
with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
|
|
||||||
|
|
||||||
if not np.isnan(no_persist) and not np.isnan(with_persist):
|
|
||||||
reduction = ((no_persist - with_persist) / no_persist * 100)
|
|
||||||
print(f"{protocol}: {reduction:.1f}% reduction with persistence")
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: persistence_impact.png")
|
|
||||||
|
|
||||||
def plot_bandwidth_overhead(self, output_dir='plots'):
|
|
||||||
"""Visualize bandwidth usage by protocol"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
bandwidth_data = self.df.groupby('protocol_category').agg({
|
|
||||||
'request_size_bytes': 'mean',
|
|
||||||
'response_size_bytes': 'mean'
|
|
||||||
}).reset_index()
|
|
||||||
|
|
||||||
bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] +
|
|
||||||
bandwidth_data['response_size_bytes'])
|
|
||||||
|
|
||||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
||||||
|
|
||||||
# Plot 1: Request vs Response sizes
|
|
||||||
x = np.arange(len(bandwidth_data))
|
|
||||||
width = 0.35
|
|
||||||
|
|
||||||
ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width,
|
|
||||||
label='Request', alpha=0.8)
|
|
||||||
ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width,
|
|
||||||
label='Response', alpha=0.8)
|
|
||||||
|
|
||||||
ax1.set_xlabel('Protocol', fontsize=12)
|
|
||||||
ax1.set_ylabel('Bytes', fontsize=12)
|
|
||||||
ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
|
|
||||||
ax1.set_xticks(x)
|
|
||||||
ax1.set_xticklabels(bandwidth_data['protocol_category'])
|
|
||||||
ax1.legend()
|
|
||||||
ax1.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
# Plot 2: Total bandwidth overhead vs UDP baseline
|
|
||||||
udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
|
|
||||||
if len(udp_total) > 0:
|
|
||||||
bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
|
|
||||||
|
|
||||||
colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
|
|
||||||
ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'],
|
|
||||||
color=colors, alpha=0.7)
|
|
||||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
|
||||||
ax2.set_xlabel('Protocol', fontsize=12)
|
|
||||||
ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
|
|
||||||
ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
|
|
||||||
ax2.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: bandwidth_overhead.png")
|
|
||||||
|
|
||||||
def plot_heatmap(self, output_dir='plots'):
|
|
||||||
"""Heatmap of provider-protocol performance"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
# Create pivot table
|
|
||||||
heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
|
|
||||||
|
|
||||||
plt.figure(figsize=(12, 8))
|
|
||||||
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r',
|
|
||||||
cbar_kws={'label': 'Median Latency (ms)'})
|
|
||||||
|
|
||||||
plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
|
|
||||||
plt.xlabel('Protocol', fontsize=12)
|
|
||||||
plt.ylabel('Provider', fontsize=12)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: provider_protocol_heatmap.png")
|
|
||||||
|
|
||||||
def plot_percentile_comparison(self, output_dir='plots'):
|
|
||||||
"""Plot percentile comparison across protocols"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
percentiles = [50, 75, 90, 95, 99]
|
|
||||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
|
||||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
|
||||||
|
|
||||||
percentile_data = []
|
|
||||||
for protocol in available:
|
|
||||||
data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
|
|
||||||
for p in percentiles:
|
|
||||||
percentile_data.append({
|
|
||||||
'protocol': protocol,
|
|
||||||
'percentile': f'P{p}',
|
|
||||||
'latency': np.percentile(data, p)
|
|
||||||
})
|
|
||||||
|
|
||||||
percentile_df = pd.DataFrame(percentile_data)
|
|
||||||
|
|
||||||
plt.figure(figsize=(14, 7))
|
|
||||||
sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
|
|
||||||
|
|
||||||
plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
|
|
||||||
plt.xlabel('Protocol', fontsize=12)
|
|
||||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
|
||||||
plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f"✓ Saved: percentile_comparison.png")
|
|
||||||
|
|
||||||
def statistical_tests(self):
|
|
||||||
"""Perform statistical significance tests"""
|
|
||||||
print("\n" + "="*80)
|
|
||||||
print("STATISTICAL TESTS")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
# Test 1: Protocol differences (Kruskal-Wallis)
|
|
||||||
protocols = self.df['protocol_category'].unique()
|
|
||||||
if len(protocols) > 2:
|
|
||||||
groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values
|
|
||||||
for p in protocols]
|
|
||||||
h_stat, p_value = stats.kruskal(*groups)
|
|
||||||
print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
|
|
||||||
print(f"H-statistic: {h_stat:.4f}")
|
|
||||||
print(f"p-value: {p_value:.4e}")
|
|
||||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
|
|
||||||
|
|
||||||
# Test 2: DNSSEC impact (Mann-Whitney U)
|
|
||||||
if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
|
||||||
none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
|
|
||||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
|
||||||
|
|
||||||
u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
|
|
||||||
print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
|
|
||||||
print(f"U-statistic: {u_stat:.4f}")
|
|
||||||
print(f"p-value: {p_value:.4e}")
|
|
||||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
|
|
||||||
|
|
||||||
# Test 3: Trust vs Auth comparison
|
|
||||||
if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
|
||||||
trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
|
|
||||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
|
||||||
|
|
||||||
u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
|
|
||||||
print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
|
|
||||||
print(f"U-statistic: {u_stat:.4f}")
|
|
||||||
print(f"p-value: {p_value:.4e}")
|
|
||||||
print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
|
|
||||||
|
|
||||||
def generate_latex_table(self, output_dir='plots'):
|
|
||||||
"""Generate LaTeX table for thesis"""
|
|
||||||
Path(output_dir).mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
# Summary table by protocol
|
|
||||||
summary = self.df.groupby('protocol_category')['duration_ms'].agg([
|
|
||||||
('Mean', 'mean'),
|
|
||||||
('Median', 'median'),
|
|
||||||
('Std Dev', 'std'),
|
|
||||||
('P95', lambda x: x.quantile(0.95)),
|
|
||||||
('P99', lambda x: x.quantile(0.99))
|
|
||||||
]).round(2)
|
|
||||||
|
|
||||||
latex_code = summary.to_latex(float_format="%.2f")
|
|
||||||
|
|
||||||
with open(f'{output_dir}/summary_table.tex', 'w') as f:
|
|
||||||
f.write(latex_code)
|
|
||||||
|
|
||||||
print(f"✓ Saved: summary_table.tex")
|
|
||||||
print("\nLaTeX Table Preview:")
|
|
||||||
print(latex_code)
|
|
||||||
|
|
||||||
def run_full_analysis(self):
|
|
||||||
"""Run complete analysis pipeline"""
|
|
||||||
print("="*80)
|
|
||||||
print("DNS QoS Analysis - Starting Full Analysis")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
# Load data
|
|
||||||
print("\n[1/10] Loading data...")
|
|
||||||
self.load_all_data()
|
|
||||||
|
|
||||||
# Generate statistics
|
|
||||||
print("\n[2/10] Generating summary statistics...")
|
|
||||||
self.generate_summary_statistics()
|
|
||||||
|
|
||||||
# Statistical tests
|
|
||||||
print("\n[3/10] Running statistical tests...")
|
|
||||||
self.statistical_tests()
|
|
||||||
|
|
||||||
# Generate plots
|
|
||||||
print("\n[4/10] Creating latency by protocol plot...")
|
|
||||||
self.plot_latency_by_protocol()
|
|
||||||
|
|
||||||
print("\n[5/10] Creating provider comparison plot...")
|
|
||||||
self.plot_provider_comparison()
|
|
||||||
|
|
||||||
print("\n[6/10] Creating DNSSEC impact plot...")
|
|
||||||
self.plot_dnssec_impact()
|
|
||||||
|
|
||||||
print("\n[7/10] Creating persistence impact plot...")
|
|
||||||
self.plot_persistence_impact()
|
|
||||||
|
|
||||||
print("\n[8/10] Creating bandwidth overhead plot...")
|
|
||||||
self.plot_bandwidth_overhead()
|
|
||||||
|
|
||||||
print("\n[9/10] Creating heatmap...")
|
|
||||||
self.plot_heatmap()
|
|
||||||
|
|
||||||
print("\n[10/10] Creating percentile comparison...")
|
|
||||||
self.plot_percentile_comparison()
|
|
||||||
|
|
||||||
# Generate LaTeX table
|
|
||||||
print("\n[Bonus] Generating LaTeX table...")
|
|
||||||
self.generate_latex_table()
|
|
||||||
|
|
||||||
print("\n" + "="*80)
|
|
||||||
print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
analyzer = DNSAnalyzer(results_dir='results')
|
|
||||||
analyzer.run_full_analysis()
|
|
||||||
@@ -1,536 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
from pathlib import Path
|
|
||||||
import datetime
|
|
||||||
from dateutil import parser as date_parser
|
|
||||||
import dpkt
|
|
||||||
|
|
||||||
# Set style
|
|
||||||
sns.set_style("whitegrid")
|
|
||||||
plt.rcParams['figure.dpi'] = 300
|
|
||||||
plt.rcParams['savefig.dpi'] = 300
|
|
||||||
plt.rcParams['font.size'] = 10
|
|
||||||
|
|
||||||
class FastDNSAnalyzer:
|
|
||||||
def __init__(self, results_dir='results'):
|
|
||||||
self.results_dir = Path(results_dir)
|
|
||||||
self.all_data = []
|
|
||||||
|
|
||||||
def should_include_file(self, filename):
|
|
||||||
"""Filter out DNSSEC and non-persist files"""
|
|
||||||
name = filename.stem
|
|
||||||
if 'auth' in name or 'trust' in name:
|
|
||||||
return False
|
|
||||||
if name in ['tls', 'https']:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def parse_rfc3339_nano(self, timestamp_str):
|
|
||||||
"""Parse RFC3339Nano timestamp with timezone"""
|
|
||||||
try:
|
|
||||||
dt = date_parser.parse(timestamp_str)
|
|
||||||
return dt.astimezone(datetime.timezone.utc).timestamp()
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Error parsing timestamp {timestamp_str}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
|
|
||||||
"""Fast bandwidth extraction using dpkt"""
|
|
||||||
print(f" Analyzing pcap: {pcap_file.name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(pcap_file, 'rb') as f:
|
|
||||||
pcap = dpkt.pcap.Reader(f)
|
|
||||||
|
|
||||||
# Build query time windows
|
|
||||||
query_windows = []
|
|
||||||
for idx, row in csv_data.iterrows():
|
|
||||||
start_time = self.parse_rfc3339_nano(row['timestamp'])
|
|
||||||
if start_time is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
duration_seconds = row['duration_ns'] / 1_000_000_000
|
|
||||||
end_time = start_time + duration_seconds
|
|
||||||
|
|
||||||
query_windows.append({
|
|
||||||
'index': idx,
|
|
||||||
'start': start_time,
|
|
||||||
'end': end_time,
|
|
||||||
'bytes_sent': 0,
|
|
||||||
'bytes_received': 0,
|
|
||||||
'packets_sent': 0,
|
|
||||||
'packets_received': 0
|
|
||||||
})
|
|
||||||
|
|
||||||
if not query_windows:
|
|
||||||
print(" ✗ No valid query windows")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Sort windows for faster matching
|
|
||||||
query_windows.sort(key=lambda x: x['start'])
|
|
||||||
|
|
||||||
# Process packets
|
|
||||||
packet_count = 0
|
|
||||||
matched_count = 0
|
|
||||||
|
|
||||||
for timestamp, buf in pcap:
|
|
||||||
packet_count += 1
|
|
||||||
packet_size = len(buf)
|
|
||||||
|
|
||||||
# Quick parse to determine direction
|
|
||||||
try:
|
|
||||||
eth = dpkt.ethernet.Ethernet(buf)
|
|
||||||
|
|
||||||
# Get IP layer
|
|
||||||
if isinstance(eth.data, dpkt.ip.IP):
|
|
||||||
ip = eth.data
|
|
||||||
elif isinstance(eth.data, dpkt.ip6.IP6):
|
|
||||||
ip = eth.data
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get transport layer
|
|
||||||
if isinstance(ip.data, dpkt.udp.UDP):
|
|
||||||
transport = ip.data
|
|
||||||
src_port = transport.sport
|
|
||||||
dst_port = transport.dport
|
|
||||||
elif isinstance(ip.data, dpkt.tcp.TCP):
|
|
||||||
transport = ip.data
|
|
||||||
src_port = transport.sport
|
|
||||||
dst_port = transport.dport
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Determine direction (client port usually higher)
|
|
||||||
is_outbound = src_port > dst_port
|
|
||||||
|
|
||||||
# Binary search for matching window
|
|
||||||
for window in query_windows:
|
|
||||||
if window['start'] <= timestamp <= window['end']:
|
|
||||||
if is_outbound:
|
|
||||||
window['bytes_sent'] += packet_size
|
|
||||||
window['packets_sent'] += 1
|
|
||||||
else:
|
|
||||||
window['bytes_received'] += packet_size
|
|
||||||
window['packets_received'] += 1
|
|
||||||
matched_count += 1
|
|
||||||
break
|
|
||||||
elif timestamp < window['start']:
|
|
||||||
break # No more windows to check
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
|
|
||||||
|
|
||||||
# Convert to DataFrame
|
|
||||||
bandwidth_df = pd.DataFrame(query_windows)
|
|
||||||
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
|
|
||||||
'packets_sent', 'packets_received']]
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ✗ Error reading pcap: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def load_data(self):
|
|
||||||
"""Load all relevant CSV files and extract bandwidth from pcaps"""
|
|
||||||
print("Loading data and analyzing bandwidth...")
|
|
||||||
|
|
||||||
for provider_dir in self.results_dir.iterdir():
|
|
||||||
if not provider_dir.is_dir():
|
|
||||||
continue
|
|
||||||
|
|
||||||
provider = provider_dir.name
|
|
||||||
|
|
||||||
for csv_file in provider_dir.glob('*.csv'):
|
|
||||||
if not self.should_include_file(csv_file):
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(csv_file)
|
|
||||||
df['provider'] = provider
|
|
||||||
df['test_file'] = csv_file.stem
|
|
||||||
df['csv_path'] = str(csv_file)
|
|
||||||
|
|
||||||
# Find corresponding pcap file
|
|
||||||
pcap_file = csv_file.with_suffix('.pcap')
|
|
||||||
if pcap_file.exists():
|
|
||||||
print(f" Processing: {provider}/{csv_file.name}")
|
|
||||||
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
|
|
||||||
|
|
||||||
if bandwidth_data is not None and len(bandwidth_data) > 0:
|
|
||||||
# Merge bandwidth data
|
|
||||||
df = df.reset_index(drop=True)
|
|
||||||
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
|
|
||||||
df[col] = 0
|
|
||||||
|
|
||||||
for _, row in bandwidth_data.iterrows():
|
|
||||||
idx = int(row['index'])
|
|
||||||
if idx < len(df):
|
|
||||||
df.at[idx, 'bytes_sent'] = row['bytes_sent']
|
|
||||||
df.at[idx, 'bytes_received'] = row['bytes_received']
|
|
||||||
df.at[idx, 'packets_sent'] = row['packets_sent']
|
|
||||||
df.at[idx, 'packets_received'] = row['packets_received']
|
|
||||||
|
|
||||||
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
|
|
||||||
|
|
||||||
print(f" ✓ Extracted bandwidth for {len(df)} queries")
|
|
||||||
else:
|
|
||||||
print(f" ⚠ Could not extract bandwidth data")
|
|
||||||
else:
|
|
||||||
print(f" ⚠ No pcap found for {csv_file.name}")
|
|
||||||
|
|
||||||
self.all_data.append(df)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ✗ Error loading {csv_file}: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
print(f"\nTotal files loaded: {len(self.all_data)}")
|
|
||||||
|
|
||||||
def create_line_graphs(self, output_dir='output/line_graphs'):
|
|
||||||
"""Create line graphs for latency and bandwidth"""
|
|
||||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
print("\nGenerating line graphs...")
|
|
||||||
|
|
||||||
for df in self.all_data:
|
|
||||||
provider = df['provider'].iloc[0]
|
|
||||||
test_name = df['test_file'].iloc[0]
|
|
||||||
|
|
||||||
df['query_index'] = range(1, len(df) + 1)
|
|
||||||
|
|
||||||
# Create figure with 2 subplots
|
|
||||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
|
|
||||||
|
|
||||||
# Plot 1: Latency
|
|
||||||
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
|
|
||||||
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
|
|
||||||
mean_latency = df['duration_ms'].mean()
|
|
||||||
ax1.axhline(y=mean_latency, color='r', linestyle='--',
|
|
||||||
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
|
|
||||||
ax1.set_xlabel('Query Number', fontsize=12)
|
|
||||||
ax1.set_ylabel('Latency (ms)', fontsize=12)
|
|
||||||
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
|
|
||||||
ax1.legend()
|
|
||||||
ax1.grid(True, alpha=0.3)
|
|
||||||
|
|
||||||
# Plot 2: Bandwidth
|
|
||||||
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
|
|
||||||
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
|
|
||||||
markersize=4, linewidth=1, alpha=0.7,
|
|
||||||
color='orange', label='Sent')
|
|
||||||
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
|
|
||||||
markersize=4, linewidth=1, alpha=0.7,
|
|
||||||
color='green', label='Received')
|
|
||||||
|
|
||||||
mean_sent = df['bytes_sent'].mean()
|
|
||||||
mean_received = df['bytes_received'].mean()
|
|
||||||
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
|
|
||||||
linewidth=1.5, alpha=0.5)
|
|
||||||
ax2.axhline(y=mean_received, color='green', linestyle='--',
|
|
||||||
linewidth=1.5, alpha=0.5)
|
|
||||||
|
|
||||||
ax2.set_xlabel('Query Number', fontsize=12)
|
|
||||||
ax2.set_ylabel('Bytes', fontsize=12)
|
|
||||||
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
|
|
||||||
fontsize=12, fontweight='bold')
|
|
||||||
ax2.legend()
|
|
||||||
ax2.grid(True, alpha=0.3)
|
|
||||||
|
|
||||||
fig.suptitle(f'{provider.upper()} - {test_name}',
|
|
||||||
fontsize=14, fontweight='bold')
|
|
||||||
plt.tight_layout()
|
|
||||||
|
|
||||||
filename = f"{provider}_{test_name}.png"
|
|
||||||
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
print(f" ✓ Created: {filename}")
|
|
||||||
|
|
||||||
def get_protocol_name(self, test_file):
|
|
||||||
"""Extract clean protocol name"""
|
|
||||||
name = test_file.replace('-persist', '')
|
|
||||||
|
|
||||||
protocol_map = {
|
|
||||||
'udp': 'Plain DNS (UDP)',
|
|
||||||
'tls': 'DoT (DNS over TLS)',
|
|
||||||
'https': 'DoH (DNS over HTTPS)',
|
|
||||||
'doh3': 'DoH/3 (DNS over HTTP/3)',
|
|
||||||
'doq': 'DoQ (DNS over QUIC)'
|
|
||||||
}
|
|
||||||
|
|
||||||
return protocol_map.get(name, name.upper())
|
|
||||||
|
|
||||||
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
|
|
||||||
"""Create bar graphs comparing resolvers for latency and bandwidth"""
|
|
||||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
print("\nGenerating resolver comparison graphs...")
|
|
||||||
|
|
||||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
|
||||||
protocols = combined_df['test_file'].unique()
|
|
||||||
|
|
||||||
for protocol in protocols:
|
|
||||||
protocol_data = combined_df[combined_df['test_file'] == protocol]
|
|
||||||
protocol_name = self.get_protocol_name(protocol)
|
|
||||||
|
|
||||||
# Latency stats
|
|
||||||
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
|
|
||||||
('mean', 'mean'),
|
|
||||||
('median', 'median'),
|
|
||||||
('std', 'std')
|
|
||||||
]).reset_index()
|
|
||||||
|
|
||||||
# Create latency comparison
|
|
||||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
||||||
fig.suptitle(f'{protocol_name} - Latency Comparison',
|
|
||||||
fontsize=16, fontweight='bold')
|
|
||||||
|
|
||||||
# Mean latency
|
|
||||||
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
|
|
||||||
color='steelblue', alpha=0.8, edgecolor='black')
|
|
||||||
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
|
|
||||||
yerr=latency_stats['std'], fmt='none', color='black',
|
|
||||||
capsize=5, alpha=0.6)
|
|
||||||
|
|
||||||
for bar in bars1:
|
|
||||||
height = bar.get_height()
|
|
||||||
ax1.text(bar.get_x() + bar.get_width()/2., height,
|
|
||||||
f'{height:.2f}',
|
|
||||||
ha='center', va='bottom', fontweight='bold')
|
|
||||||
|
|
||||||
ax1.set_xlabel('Resolver', fontsize=12)
|
|
||||||
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
|
|
||||||
ax1.set_title('Mean Latency', fontsize=12)
|
|
||||||
ax1.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
# Median latency
|
|
||||||
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
|
|
||||||
color='coral', alpha=0.8, edgecolor='black')
|
|
||||||
|
|
||||||
for bar in bars2:
|
|
||||||
height = bar.get_height()
|
|
||||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
|
||||||
f'{height:.2f}',
|
|
||||||
ha='center', va='bottom', fontweight='bold')
|
|
||||||
|
|
||||||
ax2.set_xlabel('Resolver', fontsize=12)
|
|
||||||
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
|
|
||||||
ax2.set_title('Median Latency', fontsize=12)
|
|
||||||
ax2.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f" ✓ Created: latency_{protocol}.png")
|
|
||||||
|
|
||||||
# Bandwidth comparison
|
|
||||||
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
|
|
||||||
bandwidth_stats = protocol_data.groupby('provider').agg({
|
|
||||||
'bytes_sent': 'mean',
|
|
||||||
'bytes_received': 'mean',
|
|
||||||
'total_bytes': 'mean'
|
|
||||||
}).reset_index()
|
|
||||||
|
|
||||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
|
||||||
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
|
|
||||||
fontsize=16, fontweight='bold')
|
|
||||||
|
|
||||||
# Sent vs Received
|
|
||||||
x = np.arange(len(bandwidth_stats))
|
|
||||||
width = 0.35
|
|
||||||
|
|
||||||
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
|
|
||||||
label='Sent', color='orange', alpha=0.8, edgecolor='black')
|
|
||||||
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
|
|
||||||
label='Received', color='green', alpha=0.8, edgecolor='black')
|
|
||||||
|
|
||||||
ax1.set_xlabel('Resolver', fontsize=12)
|
|
||||||
ax1.set_ylabel('Bytes per Query', fontsize=12)
|
|
||||||
ax1.set_title('Average Bandwidth per Query', fontsize=12)
|
|
||||||
ax1.set_xticks(x)
|
|
||||||
ax1.set_xticklabels(bandwidth_stats['provider'])
|
|
||||||
ax1.legend()
|
|
||||||
ax1.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
# Total bandwidth
|
|
||||||
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
|
|
||||||
color='purple', alpha=0.8, edgecolor='black')
|
|
||||||
|
|
||||||
for bar in bars3:
|
|
||||||
height = bar.get_height()
|
|
||||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
|
||||||
f'{height:.0f}',
|
|
||||||
ha='center', va='bottom', fontweight='bold')
|
|
||||||
|
|
||||||
ax2.set_xlabel('Resolver', fontsize=12)
|
|
||||||
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
|
|
||||||
ax2.set_title('Total Bandwidth per Query', fontsize=12)
|
|
||||||
ax2.grid(axis='y', alpha=0.3)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
print(f" ✓ Created: bandwidth_{protocol}.png")
|
|
||||||
|
|
||||||
def generate_latex_tables(self, output_dir='output/tables'):
|
|
||||||
"""Generate LaTeX tables with latency and bandwidth statistics"""
|
|
||||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
print("\nGenerating LaTeX tables...")
|
|
||||||
|
|
||||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
|
||||||
|
|
||||||
# Generate latency table for each resolver
|
|
||||||
for provider in combined_df['provider'].unique():
|
|
||||||
provider_data = combined_df[combined_df['provider'] == provider]
|
|
||||||
|
|
||||||
stats = provider_data.groupby('test_file')['duration_ms'].agg([
|
|
||||||
('Mean', 'mean'),
|
|
||||||
('Median', 'median'),
|
|
||||||
('Std Dev', 'std'),
|
|
||||||
('P95', lambda x: x.quantile(0.95)),
|
|
||||||
('P99', lambda x: x.quantile(0.99))
|
|
||||||
]).round(2)
|
|
||||||
|
|
||||||
stats.index = stats.index.map(self.get_protocol_name)
|
|
||||||
stats.index.name = 'Protocol'
|
|
||||||
|
|
||||||
latex_code = stats.to_latex(
|
|
||||||
caption=f'{provider.upper()} - Latency Statistics (ms)',
|
|
||||||
label=f'tab:{provider}_latency',
|
|
||||||
float_format="%.2f"
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
|
|
||||||
f.write(latex_code)
|
|
||||||
|
|
||||||
print(f" ✓ Created: {provider}_latency.tex")
|
|
||||||
|
|
||||||
# Generate bandwidth table for each resolver
|
|
||||||
for provider in combined_df['provider'].unique():
|
|
||||||
provider_data = combined_df[combined_df['provider'] == provider]
|
|
||||||
|
|
||||||
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
bandwidth_stats = provider_data.groupby('test_file').agg({
|
|
||||||
'bytes_sent': 'mean',
|
|
||||||
'bytes_received': 'mean',
|
|
||||||
'total_bytes': 'mean'
|
|
||||||
}).round(2)
|
|
||||||
|
|
||||||
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
|
|
||||||
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
|
|
||||||
bandwidth_stats.index.name = 'Protocol'
|
|
||||||
|
|
||||||
latex_code = bandwidth_stats.to_latex(
|
|
||||||
caption=f'{provider.upper()} - Bandwidth Statistics',
|
|
||||||
label=f'tab:{provider}_bandwidth',
|
|
||||||
float_format="%.2f"
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
|
|
||||||
f.write(latex_code)
|
|
||||||
|
|
||||||
print(f" ✓ Created: {provider}_bandwidth.tex")
|
|
||||||
|
|
||||||
# Generate protocol efficiency table
|
|
||||||
print("\nGenerating protocol efficiency table...")
|
|
||||||
|
|
||||||
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
|
|
||||||
protocol_bandwidth = combined_df.groupby('test_file').agg({
|
|
||||||
'bytes_sent': 'mean',
|
|
||||||
'bytes_received': 'mean',
|
|
||||||
'total_bytes': 'mean'
|
|
||||||
}).round(2)
|
|
||||||
|
|
||||||
# Find UDP baseline
|
|
||||||
udp_baseline = None
|
|
||||||
for protocol in protocol_bandwidth.index:
|
|
||||||
if 'udp' in protocol:
|
|
||||||
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
|
|
||||||
break
|
|
||||||
|
|
||||||
if udp_baseline and udp_baseline > 0:
|
|
||||||
protocol_bandwidth['Overhead vs UDP (%)'] = (
|
|
||||||
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
|
|
||||||
).round(1)
|
|
||||||
protocol_bandwidth['Efficiency (%)'] = (
|
|
||||||
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
|
|
||||||
).round(1)
|
|
||||||
|
|
||||||
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
|
|
||||||
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
|
|
||||||
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
|
|
||||||
protocol_bandwidth.index.name = 'Protocol'
|
|
||||||
|
|
||||||
latex_code = protocol_bandwidth.to_latex(
|
|
||||||
caption='Protocol Bandwidth Efficiency Comparison',
|
|
||||||
label='tab:protocol_efficiency',
|
|
||||||
float_format="%.2f"
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
|
|
||||||
f.write(latex_code)
|
|
||||||
|
|
||||||
print(f" ✓ Created: protocol_efficiency.tex")
|
|
||||||
print("\n--- Protocol Efficiency ---")
|
|
||||||
print(protocol_bandwidth.to_string())
|
|
||||||
|
|
||||||
# Generate combined comparison tables
|
|
||||||
for metric in ['Mean', 'Median', 'P95']:
|
|
||||||
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
|
|
||||||
('Mean', 'mean'),
|
|
||||||
('Median', 'median'),
|
|
||||||
('P95', lambda x: x.quantile(0.95))
|
|
||||||
]).round(2)
|
|
||||||
|
|
||||||
pivot_table = comparison_stats[metric].unstack(level=0)
|
|
||||||
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
|
|
||||||
pivot_table.index.name = 'Protocol'
|
|
||||||
|
|
||||||
latex_code = pivot_table.to_latex(
|
|
||||||
caption=f'Resolver Latency Comparison - {metric} (ms)',
|
|
||||||
label=f'tab:comparison_{metric.lower()}',
|
|
||||||
float_format="%.2f"
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
|
|
||||||
f.write(latex_code)
|
|
||||||
|
|
||||||
print(f" ✓ Created: comparison_{metric.lower()}.tex")
|
|
||||||
|
|
||||||
def run_analysis(self):
|
|
||||||
"""Run the complete analysis"""
|
|
||||||
print("="*80)
|
|
||||||
print("Fast DNS QoS Analysis with Bandwidth")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
self.load_data()
|
|
||||||
|
|
||||||
if not self.all_data:
|
|
||||||
print("\n⚠ No data loaded.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("\n" + "="*80)
|
|
||||||
self.create_line_graphs()
|
|
||||||
|
|
||||||
print("\n" + "="*80)
|
|
||||||
self.create_resolver_comparison_bars()
|
|
||||||
|
|
||||||
print("\n" + "="*80)
|
|
||||||
self.generate_latex_tables()
|
|
||||||
|
|
||||||
print("\n" + "="*80)
|
|
||||||
print("✓ Analysis Complete!")
|
|
||||||
print("="*80)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
analyzer = FastDNSAnalyzer(results_dir='results')
|
|
||||||
analyzer.run_analysis()
|
|
||||||
+55
-12
@@ -15,6 +15,15 @@ import dpkt
|
|||||||
from dateutil import parser as date_parser
|
from dateutil import parser as date_parser
|
||||||
|
|
||||||
|
|
||||||
|
BANDWIDTH_COLUMNS = [
|
||||||
|
'bytes_sent',
|
||||||
|
'bytes_received',
|
||||||
|
'packets_sent',
|
||||||
|
'packets_received',
|
||||||
|
'total_bytes',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class Packet(NamedTuple):
|
class Packet(NamedTuple):
|
||||||
"""Lightweight packet representation."""
|
"""Lightweight packet representation."""
|
||||||
timestamp: float
|
timestamp: float
|
||||||
@@ -36,6 +45,36 @@ class QueryWindow:
|
|||||||
self.pkts_received = 0
|
self.pkts_received = 0
|
||||||
|
|
||||||
|
|
||||||
|
def is_already_processed(csv_path: Path) -> bool:
|
||||||
|
"""
|
||||||
|
Check if CSV has already been processed.
|
||||||
|
Returns True if bandwidth columns exist AND at least one row has non-zero data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
|
||||||
|
# Check if columns exist
|
||||||
|
if not reader.fieldnames:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not all(col in reader.fieldnames for col in BANDWIDTH_COLUMNS):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if any row has non-zero bandwidth data
|
||||||
|
for row in reader:
|
||||||
|
for col in BANDWIDTH_COLUMNS:
|
||||||
|
val = row.get(col, '').strip()
|
||||||
|
if val and val != '0':
|
||||||
|
return True
|
||||||
|
|
||||||
|
# All rows have zero/empty values - not truly processed
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def parse_csv_timestamp(ts_str: str) -> float:
|
def parse_csv_timestamp(ts_str: str) -> float:
|
||||||
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
|
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
|
||||||
dt = date_parser.isoparse(ts_str)
|
dt = date_parser.isoparse(ts_str)
|
||||||
@@ -249,24 +288,20 @@ def write_enriched_csv(
|
|||||||
shutil.copy2(csv_path, backup_path)
|
shutil.copy2(csv_path, backup_path)
|
||||||
print(f" Backup: {backup_path.name}")
|
print(f" Backup: {backup_path.name}")
|
||||||
|
|
||||||
# Get fieldnames
|
# Get fieldnames - filter out any existing bandwidth columns to avoid dupes
|
||||||
original_fields = list(queries[0]['data'].keys())
|
original_fields = [
|
||||||
new_fields = [
|
f for f in queries[0]['data'].keys()
|
||||||
'bytes_sent',
|
if f not in BANDWIDTH_COLUMNS
|
||||||
'bytes_received',
|
|
||||||
'packets_sent',
|
|
||||||
'packets_received',
|
|
||||||
'total_bytes',
|
|
||||||
]
|
]
|
||||||
fieldnames = original_fields + new_fields
|
fieldnames = original_fields + BANDWIDTH_COLUMNS
|
||||||
|
|
||||||
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
|
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
|
||||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
||||||
for q in queries:
|
for q in queries:
|
||||||
row = q['data'].copy()
|
row = {k: v for k, v in q['data'].items() if k not in BANDWIDTH_COLUMNS}
|
||||||
for field in new_fields:
|
for field in BANDWIDTH_COLUMNS:
|
||||||
row[field] = q[field]
|
row[field] = q[field]
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
@@ -281,6 +316,7 @@ def process_provider_directory(provider_path: Path):
|
|||||||
|
|
||||||
csv_files = sorted(provider_path.glob('*.csv'))
|
csv_files = sorted(provider_path.glob('*.csv'))
|
||||||
processed = 0
|
processed = 0
|
||||||
|
skipped = 0
|
||||||
total_time = 0
|
total_time = 0
|
||||||
|
|
||||||
for csv_path in csv_files:
|
for csv_path in csv_files:
|
||||||
@@ -294,6 +330,12 @@ def process_provider_directory(provider_path: Path):
|
|||||||
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
|
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check if already processed
|
||||||
|
if is_already_processed(csv_path):
|
||||||
|
print(f"\n ⏭ Skipping {csv_path.name} - already processed")
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
print(f"\n 📁 {csv_path.name}")
|
print(f"\n 📁 {csv_path.name}")
|
||||||
file_start = time.time()
|
file_start = time.time()
|
||||||
|
|
||||||
@@ -323,7 +365,8 @@ def process_provider_directory(provider_path: Path):
|
|||||||
print(f" ✓ Completed in {file_time:.2f}s")
|
print(f" ✓ Completed in {file_time:.2f}s")
|
||||||
|
|
||||||
print(f"\n {'='*58}")
|
print(f"\n {'='*58}")
|
||||||
print(f" {provider_path.name}: {processed} files in {total_time:.2f}s")
|
print(f" {provider_path.name}: {processed} processed, {skipped} skipped")
|
||||||
|
print(f" Time: {total_time:.2f}s")
|
||||||
print(f" {'='*58}")
|
print(f" {'='*58}")
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Merge all DNS test CSVs into a single unified CSV.
|
||||||
|
Extracts metadata from filenames and directory structure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from dateutil import parser as date_parser
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def parse_config(filename: str) -> dict:
|
||||||
|
"""
|
||||||
|
Parse protocol, dnssec_mode, and keep_alive from filename.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
doh3-auth.csv → protocol=doh3, dnssec=auth, persist=0
|
||||||
|
tls-trust-persist.csv → protocol=tls, dnssec=trust, persist=1
|
||||||
|
https.csv → protocol=https, dnssec=off, persist=0
|
||||||
|
doudp-auth.csv → protocol=doudp, dnssec=auth, persist=0
|
||||||
|
dnscrypt-trust.csv → protocol=dnscrypt, dnssec=trust, persist=0
|
||||||
|
"""
|
||||||
|
base = filename.replace('.csv', '')
|
||||||
|
parts = base.split('-')
|
||||||
|
|
||||||
|
protocol = parts[0]
|
||||||
|
dnssec_mode = 'off'
|
||||||
|
keep_alive = 0
|
||||||
|
|
||||||
|
for part in parts[1:]:
|
||||||
|
if part in ('auth', 'trust'):
|
||||||
|
dnssec_mode = part
|
||||||
|
elif part == 'persist':
|
||||||
|
keep_alive = 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
'protocol': protocol,
|
||||||
|
'dnssec_mode': dnssec_mode,
|
||||||
|
'keep_alive': keep_alive,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_timestamp_unix(ts_str: str) -> float:
|
||||||
|
"""Convert RFC3339 timestamp to Unix epoch."""
|
||||||
|
try:
|
||||||
|
dt = date_parser.isoparse(ts_str)
|
||||||
|
return dt.timestamp()
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def ns_to_ms(duration_ns: str) -> float:
|
||||||
|
"""Convert nanoseconds to milliseconds."""
|
||||||
|
try:
|
||||||
|
return float(duration_ns) / 1_000_000
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def find_csv_files(input_dir: Path) -> list:
|
||||||
|
"""Find all non-backup CSV files."""
|
||||||
|
files = []
|
||||||
|
for csv_path in input_dir.rglob('*.csv'):
|
||||||
|
if '.bak' in csv_path.name:
|
||||||
|
continue
|
||||||
|
files.append(csv_path)
|
||||||
|
return sorted(files)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_all_csvs(input_dir: Path, output_path: Path):
|
||||||
|
"""Merge all CSVs into a single file."""
|
||||||
|
|
||||||
|
csv_files = find_csv_files(input_dir)
|
||||||
|
|
||||||
|
if not csv_files:
|
||||||
|
print("No CSV files found")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(csv_files)} CSV files")
|
||||||
|
|
||||||
|
# Output columns in desired order
|
||||||
|
output_columns = [
|
||||||
|
'id',
|
||||||
|
'provider',
|
||||||
|
'protocol',
|
||||||
|
'dnssec_mode',
|
||||||
|
'domain',
|
||||||
|
'query_type',
|
||||||
|
'keep_alive',
|
||||||
|
'dns_server',
|
||||||
|
'timestamp',
|
||||||
|
'timestamp_unix',
|
||||||
|
'duration_ns',
|
||||||
|
'duration_ms',
|
||||||
|
'request_size_bytes',
|
||||||
|
'response_size_bytes',
|
||||||
|
'bytes_sent',
|
||||||
|
'bytes_received',
|
||||||
|
'packets_sent',
|
||||||
|
'packets_received',
|
||||||
|
'total_bytes',
|
||||||
|
'response_code',
|
||||||
|
'error',
|
||||||
|
]
|
||||||
|
|
||||||
|
global_id = 0
|
||||||
|
total_rows = 0
|
||||||
|
|
||||||
|
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
|
||||||
|
writer = csv.DictWriter(outfile, fieldnames=output_columns)
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
for csv_path in csv_files:
|
||||||
|
# Extract provider from path
|
||||||
|
provider = csv_path.parent.name.lower()
|
||||||
|
|
||||||
|
# Parse config from filename
|
||||||
|
config = parse_config(csv_path.name)
|
||||||
|
|
||||||
|
print(f" {provider}/{csv_path.name} ({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
|
||||||
|
|
||||||
|
file_rows = 0
|
||||||
|
|
||||||
|
with open(csv_path, 'r', newline='', encoding='utf-8') as infile:
|
||||||
|
reader = csv.DictReader(infile)
|
||||||
|
|
||||||
|
for row in reader:
|
||||||
|
global_id += 1
|
||||||
|
file_rows += 1
|
||||||
|
|
||||||
|
# Build output row
|
||||||
|
out_row = {
|
||||||
|
'id': global_id,
|
||||||
|
'provider': provider,
|
||||||
|
'protocol': config['protocol'],
|
||||||
|
'dnssec_mode': config['dnssec_mode'],
|
||||||
|
'keep_alive': config['keep_alive'],
|
||||||
|
'domain': row.get('domain', ''),
|
||||||
|
'query_type': row.get('query_type', ''),
|
||||||
|
'dns_server': row.get('dns_server', ''),
|
||||||
|
'timestamp': row.get('timestamp', ''),
|
||||||
|
'timestamp_unix': parse_timestamp_unix(row.get('timestamp', '')),
|
||||||
|
'duration_ns': row.get('duration_ns', ''),
|
||||||
|
'duration_ms': ns_to_ms(row.get('duration_ns', '')),
|
||||||
|
'request_size_bytes': row.get('request_size_bytes', ''),
|
||||||
|
'response_size_bytes': row.get('response_size_bytes', ''),
|
||||||
|
'bytes_sent': row.get('bytes_sent', ''),
|
||||||
|
'bytes_received': row.get('bytes_received', ''),
|
||||||
|
'packets_sent': row.get('packets_sent', ''),
|
||||||
|
'packets_received': row.get('packets_received', ''),
|
||||||
|
'total_bytes': row.get('total_bytes', ''),
|
||||||
|
'response_code': row.get('response_code', ''),
|
||||||
|
'error': row.get('error', ''),
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.writerow(out_row)
|
||||||
|
|
||||||
|
total_rows += file_rows
|
||||||
|
print(f" → {file_rows:,} rows")
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"Output: {output_path}")
|
||||||
|
print(f"Total rows: {total_rows:,}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Merge all DNS test CSVs into a single file'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'input_dir',
|
||||||
|
nargs='?',
|
||||||
|
default='.',
|
||||||
|
help='Input directory containing provider folders (default: .)'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-o', '--output',
|
||||||
|
default='dns_results.csv',
|
||||||
|
help='Output CSV path (default: dns_results.csv)'
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
input_dir = Path(args.input_dir)
|
||||||
|
output_path = Path(args.output)
|
||||||
|
|
||||||
|
if not input_dir.exists():
|
||||||
|
print(f"Error: Input directory not found: {input_dir}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("="*60)
|
||||||
|
print("MERGE ALL DNS CSVs")
|
||||||
|
print("="*60)
|
||||||
|
print(f"Input: {input_dir}")
|
||||||
|
print(f"Output: {output_path}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
merge_all_csvs(input_dir, output_path)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
exit(main())
|
||||||
Executable
+253
@@ -0,0 +1,253 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
TOOL_PATH="./qol"
|
||||||
|
DOMAINS_FILE="./domains.txt"
|
||||||
|
OUTPUT_DIR="./results"
|
||||||
|
INTERFACE="veth1"
|
||||||
|
TIMEOUT="5s"
|
||||||
|
SLEEP_TIME="1"
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-t|--tool-path)
|
||||||
|
TOOL_PATH="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-d|--domains-file)
|
||||||
|
DOMAINS_FILE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-o|--output-dir)
|
||||||
|
OUTPUT_DIR="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-I|--interface)
|
||||||
|
INTERFACE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-T|--timeout)
|
||||||
|
TIMEOUT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-s|--sleep)
|
||||||
|
SLEEP_TIME="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--help)
|
||||||
|
echo "Usage: $0 [OPTIONS]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " -t, --tool-path PATH Path to qol tool (default: ./qol)"
|
||||||
|
echo " -d, --domains-file PATH Path to domains file (default: ./domains.txt)"
|
||||||
|
echo " -o, --output-dir PATH Output directory (default: ./results)"
|
||||||
|
echo " -I, --interface NAME Network interface (default: veth1)"
|
||||||
|
echo " -T, --timeout DURATION Timeout duration (default: 5s)"
|
||||||
|
echo " -s, --sleep SECONDS Sleep between runs (default: 1)"
|
||||||
|
echo " --help Show this help"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
echo "Use --help for usage information"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Configuration:"
|
||||||
|
echo " Tool path: $TOOL_PATH"
|
||||||
|
echo " Domains file: $DOMAINS_FILE"
|
||||||
|
echo " Output dir: $OUTPUT_DIR"
|
||||||
|
echo " Interface: $INTERFACE"
|
||||||
|
echo " Timeout: $TIMEOUT"
|
||||||
|
echo " Sleep time: ${SLEEP_TIME}s"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Server definitions as associative arrays (name -> url)
|
||||||
|
declare -A CONN_SERVERS=(
|
||||||
|
["google-dotcp"]="dotcp://8.8.8.8:53"
|
||||||
|
["cloudflare-dotcp"]="dotcp://1.1.1.1:53"
|
||||||
|
["quad9-dotcp"]="dotcp://9.9.9.9:53"
|
||||||
|
["adguard-dotcp"]="dotcp://dns.adguard-dns.com:53"
|
||||||
|
["google-dot"]="tls://8.8.8.8:853"
|
||||||
|
["cloudflare-dot"]="tls://1.1.1.1:853"
|
||||||
|
["quad9-dot"]="tls://9.9.9.9:853"
|
||||||
|
["adguard-dot"]="tls://dns.adguard-dns.com:853"
|
||||||
|
["google-doh"]="https://dns.google/dns-query"
|
||||||
|
["cloudflare-doh"]="https://cloudflare-dns.com/dns-query"
|
||||||
|
["quad9-doh"]="https://dns10.quad9.net/dns-query"
|
||||||
|
["adguard-doh"]="https://dns.adguard-dns.com/dns-query"
|
||||||
|
)
|
||||||
|
|
||||||
|
declare -A QUIC_SERVERS=(
|
||||||
|
["google-doh3"]="doh3://dns.google/dns-query"
|
||||||
|
["cloudflare-doh3"]="doh3://cloudflare-dns.com/dns-query"
|
||||||
|
["adguard-doh3"]="doh3://dns.adguard-dns.com/dns-query"
|
||||||
|
["adguard-doq"]="doq://dns.adguard-dns.com:853"
|
||||||
|
)
|
||||||
|
|
||||||
|
declare -A CONNLESS_SERVERS=(
|
||||||
|
["google-udp"]="udp://8.8.8.8:53"
|
||||||
|
["cloudflare-udp"]="udp://1.1.1.1:53"
|
||||||
|
["quad9-udp"]="udp://9.9.9.9:53"
|
||||||
|
["adguard-udp"]="udp://dns.adguard-dns.com:53"
|
||||||
|
["adguard-dnscrypt"]="sdns://AQMAAAAAAAAAETk0LjE0MC4xNC4xNDo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20"
|
||||||
|
["quad9-dnscrypt"]="sdns://AQMAAAAAAAAAFDE0OS4xMTIuMTEyLjExMjo4NDQzIGfIR7jIdYzRICRVQ751Z0bfNN8dhMALjEcDaN-CHYY-GTIuZG5zY3J5cHQtY2VydC5xdWFkOS5uZXQ"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Function to get flags suffix for filename
|
||||||
|
get_flags_suffix() {
|
||||||
|
local dnssec="$1"
|
||||||
|
local auth="$2"
|
||||||
|
local keepalive="$3"
|
||||||
|
|
||||||
|
local suffix=""
|
||||||
|
if [[ "$dnssec" == "true" ]]; then
|
||||||
|
if [[ "$auth" == "true" ]]; then
|
||||||
|
suffix="auth"
|
||||||
|
else
|
||||||
|
suffix="trust"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [[ "$keepalive" == "true" ]]; then
|
||||||
|
if [[ -n "$suffix" ]]; then
|
||||||
|
suffix="${suffix}-persist"
|
||||||
|
else
|
||||||
|
suffix="persist"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "$suffix"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to run with perf and capture CPU metrics
|
||||||
|
run_with_perf() {
|
||||||
|
local name="$1"
|
||||||
|
local url="$2"
|
||||||
|
local dnssec="$3"
|
||||||
|
local auth="$4"
|
||||||
|
local keepalive="$5"
|
||||||
|
|
||||||
|
local suffix=$(get_flags_suffix "$dnssec" "$auth" "$keepalive")
|
||||||
|
local base_name="${name}"
|
||||||
|
if [[ -n "$suffix" ]]; then
|
||||||
|
base_name="${name}-${suffix}"
|
||||||
|
fi
|
||||||
|
local cpu_csv_file="$OUTPUT_DIR/${name%%-*}/${base_name}.cpu.csv" # e.g., results/cloudflare/dot-trust-persist.cpu.csv
|
||||||
|
|
||||||
|
# Write header if needed
|
||||||
|
if [[ ! -f "$cpu_csv_file" ]]; then
|
||||||
|
echo "timestamp,wall_time_seconds,instructions,cycles,peak_rss_kb" > "$cpu_csv_file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build command arguments
|
||||||
|
local cmd_args=(
|
||||||
|
"$DOMAINS_FILE"
|
||||||
|
--output-dir "$OUTPUT_DIR"
|
||||||
|
--interface "$INTERFACE"
|
||||||
|
--timeout "$TIMEOUT"
|
||||||
|
-s "$url"
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ "$dnssec" == "true" ]]; then
|
||||||
|
cmd_args+=(--dnssec)
|
||||||
|
if [[ "$auth" == "true" ]]; then
|
||||||
|
cmd_args+=(--auth-dnssec)
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$keepalive" == "true" ]]; then
|
||||||
|
cmd_args+=(--keep-alive)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create temp files for perf and time output
|
||||||
|
local perf_tmp=$(mktemp)
|
||||||
|
local time_tmp=$(mktemp)
|
||||||
|
|
||||||
|
# Run with perf stat and /usr/bin/time
|
||||||
|
local timestamp=$(date -Iseconds)
|
||||||
|
|
||||||
|
sudo perf stat -e instructions,cycles \
|
||||||
|
-o "$perf_tmp" \
|
||||||
|
/usr/bin/time -v \
|
||||||
|
"$TOOL_PATH" run "${cmd_args[@]}" 2>"$time_tmp" || true
|
||||||
|
|
||||||
|
# Parse perf output
|
||||||
|
local instructions=$(grep -oP '\d[\d,]*(?=\s+instructions)' "$perf_tmp" 2>/dev/null | tr -d ',' || echo "0")
|
||||||
|
local cycles=$(grep -oP '\d[\d,]*(?=\s+cycles)' "$perf_tmp" 2>/dev/null | tr -d ',' || echo "0")
|
||||||
|
local wall_time=$(grep -oP '\d+\.\d+(?= seconds time elapsed)' "$perf_tmp" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
|
# Parse /usr/bin/time output for peak RSS
|
||||||
|
local peak_rss=$(grep "Maximum resident set size" "$time_tmp" 2>/dev/null | grep -oP '\d+' || echo "0")
|
||||||
|
|
||||||
|
# Append to CPU CSV
|
||||||
|
echo "${timestamp},${wall_time},${instructions},${cycles},${peak_rss}" >> "$cpu_csv_file"
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
rm -f "$perf_tmp" "$time_tmp"
|
||||||
|
|
||||||
|
echo " -> CPU metrics saved to ${base_name}.cpu.csv"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to run servers with given flags
|
||||||
|
run_server_group() {
|
||||||
|
local -n servers=$1
|
||||||
|
local dnssec="$2"
|
||||||
|
local auth="$3"
|
||||||
|
local keepalive="$4"
|
||||||
|
local desc="$5"
|
||||||
|
|
||||||
|
echo "Running: $desc"
|
||||||
|
|
||||||
|
for name in "${!servers[@]}"; do
|
||||||
|
local url="${servers[$name]}"
|
||||||
|
echo " Processing: $name ($url)"
|
||||||
|
run_with_perf "$name" "$url" "$dnssec" "$auth" "$keepalive"
|
||||||
|
sleep "$SLEEP_TIME"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "=== Running TCP-based protocols (TLS/HTTPS) ==="
|
||||||
|
|
||||||
|
# DNSSEC off, Keep off
|
||||||
|
run_server_group CONN_SERVERS "false" "false" "false" "no-dnssec, no-keepalive"
|
||||||
|
|
||||||
|
# DNSSEC off, Keep on
|
||||||
|
run_server_group CONN_SERVERS "false" "false" "true" "no-dnssec, keepalive"
|
||||||
|
|
||||||
|
# DNSSEC on (trust), Keep on
|
||||||
|
run_server_group CONN_SERVERS "true" "false" "true" "dnssec-trust, keepalive"
|
||||||
|
|
||||||
|
# DNSSEC on (auth), Keep on
|
||||||
|
run_server_group CONN_SERVERS "true" "true" "true" "dnssec-auth, keepalive"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Running QUIC-based protocols (DoH3/DoQ) ==="
|
||||||
|
|
||||||
|
# DNSSEC off
|
||||||
|
run_server_group QUIC_SERVERS "false" "false" "false" "no-dnssec"
|
||||||
|
|
||||||
|
# DNSSEC on (trust)
|
||||||
|
run_server_group QUIC_SERVERS "true" "false" "false" "dnssec-trust"
|
||||||
|
|
||||||
|
# DNSSEC on (auth)
|
||||||
|
run_server_group QUIC_SERVERS "true" "true" "false" "dnssec-auth"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Running connectionless protocols (UDP) ==="
|
||||||
|
|
||||||
|
# DNSSEC off
|
||||||
|
run_server_group CONNLESS_SERVERS "false" "false" "false" "no-dnssec"
|
||||||
|
|
||||||
|
# DNSSEC on (trust)
|
||||||
|
run_server_group CONNLESS_SERVERS "true" "false" "false" "dnssec-trust"
|
||||||
|
|
||||||
|
# DNSSEC on (auth)
|
||||||
|
run_server_group CONNLESS_SERVERS "true" "true" "false" "dnssec-auth"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "All combinations completed!"
|
||||||
@@ -1,369 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/csv"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/google/gopacket"
|
|
||||||
"github.com/google/gopacket/layers"
|
|
||||||
"github.com/google/gopacket/pcapgo"
|
|
||||||
)
|
|
||||||
|
|
||||||
type QueryRecord struct {
|
|
||||||
Domain string
|
|
||||||
QueryType string
|
|
||||||
Protocol string
|
|
||||||
DNSSec string
|
|
||||||
AuthDNSSec string
|
|
||||||
KeepAlive string
|
|
||||||
DNSServer string
|
|
||||||
Timestamp string
|
|
||||||
DurationNs int64
|
|
||||||
DurationMs float64
|
|
||||||
RequestSizeBytes int
|
|
||||||
ResponseSizeBytes int
|
|
||||||
ResponseCode string
|
|
||||||
Error string
|
|
||||||
BytesSent int64
|
|
||||||
BytesReceived int64
|
|
||||||
PacketsSent int64
|
|
||||||
PacketsReceived int64
|
|
||||||
TotalBytes int64
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseRFC3339Nano(ts string) (time.Time, error) {
|
|
||||||
return time.Parse(time.RFC3339Nano, ts)
|
|
||||||
}
|
|
||||||
|
|
||||||
func processProviderFolder(providerPath string) error {
|
|
||||||
providerName := filepath.Base(providerPath)
|
|
||||||
fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
|
|
||||||
|
|
||||||
files, err := os.ReadDir(providerPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
processed := 0
|
|
||||||
skipped := 0
|
|
||||||
errors := 0
|
|
||||||
|
|
||||||
for _, file := range files {
|
|
||||||
if !strings.HasSuffix(file.Name(), ".csv") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
csvPath := filepath.Join(providerPath, file.Name())
|
|
||||||
pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
|
|
||||||
|
|
||||||
// Check if PCAP exists
|
|
||||||
if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
|
|
||||||
fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
|
|
||||||
skipped++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if already processed (has backup)
|
|
||||||
backupPath := csvPath + ".bak"
|
|
||||||
if _, err := os.Stat(backupPath); err == nil {
|
|
||||||
fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
|
|
||||||
skipped++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf(" ↻ Processing: %s ... ", file.Name())
|
|
||||||
if err := processPair(csvPath, pcapPath); err != nil {
|
|
||||||
fmt.Printf("ERROR\n")
|
|
||||||
log.Printf(" Error: %v\n", err)
|
|
||||||
errors++
|
|
||||||
} else {
|
|
||||||
fmt.Printf("✓\n")
|
|
||||||
processed++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func processPair(csvPath, pcapPath string) error {
|
|
||||||
// Create backup
|
|
||||||
backupPath := csvPath + ".bak"
|
|
||||||
input, err := os.ReadFile(csvPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("backup read failed: %w", err)
|
|
||||||
}
|
|
||||||
if err := os.WriteFile(backupPath, input, 0644); err != nil {
|
|
||||||
return fmt.Errorf("backup write failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read CSV records
|
|
||||||
records, err := readCSV(csvPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("CSV read failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(records) == 0 {
|
|
||||||
return fmt.Errorf("no records in CSV")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read and parse PCAP
|
|
||||||
packets, err := readPCAPGo(pcapPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("PCAP read failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enrich records with bandwidth data
|
|
||||||
enrichRecords(records, packets)
|
|
||||||
|
|
||||||
// Write enriched CSV
|
|
||||||
if err := writeCSV(csvPath, records); err != nil {
|
|
||||||
return fmt.Errorf("CSV write failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func readCSV(path string) ([]*QueryRecord, error) {
|
|
||||||
f, err := os.Open(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
r := csv.NewReader(f)
|
|
||||||
rows, err := r.ReadAll()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(rows) < 2 {
|
|
||||||
return nil, fmt.Errorf("CSV has no data rows")
|
|
||||||
}
|
|
||||||
|
|
||||||
records := make([]*QueryRecord, 0, len(rows)-1)
|
|
||||||
for i := 1; i < len(rows); i++ {
|
|
||||||
row := rows[i]
|
|
||||||
if len(row) < 14 {
|
|
||||||
log.Printf(" Warning: Skipping malformed row %d", i+1)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
durationNs, _ := strconv.ParseInt(row[8], 10, 64)
|
|
||||||
durationMs, _ := strconv.ParseFloat(row[9], 64)
|
|
||||||
reqSize, _ := strconv.Atoi(row[10])
|
|
||||||
respSize, _ := strconv.Atoi(row[11])
|
|
||||||
|
|
||||||
records = append(records, &QueryRecord{
|
|
||||||
Domain: row[0],
|
|
||||||
QueryType: row[1],
|
|
||||||
Protocol: row[2],
|
|
||||||
DNSSec: row[3],
|
|
||||||
AuthDNSSec: row[4],
|
|
||||||
KeepAlive: row[5],
|
|
||||||
DNSServer: row[6],
|
|
||||||
Timestamp: row[7],
|
|
||||||
DurationNs: durationNs,
|
|
||||||
DurationMs: durationMs,
|
|
||||||
RequestSizeBytes: reqSize,
|
|
||||||
ResponseSizeBytes: respSize,
|
|
||||||
ResponseCode: row[12],
|
|
||||||
Error: row[13],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return records, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type PacketInfo struct {
|
|
||||||
Timestamp time.Time
|
|
||||||
Size int
|
|
||||||
IsSent bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func readPCAPGo(path string) ([]PacketInfo, error) {
|
|
||||||
f, err := os.Open(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
reader, err := pcapgo.NewReader(f)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var packets []PacketInfo
|
|
||||||
packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
|
|
||||||
|
|
||||||
for packet := range packetSource.Packets() {
|
|
||||||
if packet.NetworkLayer() == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
isDNS := false
|
|
||||||
isSent := false
|
|
||||||
|
|
||||||
// Check UDP layer (DNS, DoQ, DoH3)
|
|
||||||
if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
|
|
||||||
udp := udpLayer.(*layers.UDP)
|
|
||||||
isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
|
|
||||||
udp.SrcPort == 853 || udp.DstPort == 853 ||
|
|
||||||
udp.SrcPort == 443 || udp.DstPort == 443
|
|
||||||
isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check TCP layer (DoT, DoH)
|
|
||||||
if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
|
|
||||||
tcp := tcpLayer.(*layers.TCP)
|
|
||||||
isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
|
|
||||||
tcp.SrcPort == 853 || tcp.DstPort == 853 ||
|
|
||||||
tcp.SrcPort == 443 || tcp.DstPort == 443
|
|
||||||
isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
|
|
||||||
}
|
|
||||||
|
|
||||||
if isDNS {
|
|
||||||
packets = append(packets, PacketInfo{
|
|
||||||
Timestamp: packet.Metadata().Timestamp,
|
|
||||||
Size: len(packet.Data()),
|
|
||||||
IsSent: isSent,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return packets, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
|
|
||||||
for _, rec := range records {
|
|
||||||
ts, err := parseRFC3339Nano(rec.Timestamp)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Define time window for this query
|
|
||||||
windowStart := ts
|
|
||||||
windowEnd := ts.Add(time.Duration(rec.DurationNs))
|
|
||||||
|
|
||||||
var sent, recv, pktSent, pktRecv int64
|
|
||||||
|
|
||||||
// Match packets within the time window
|
|
||||||
for _, pkt := range packets {
|
|
||||||
if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
|
|
||||||
pkt.Timestamp.Before(windowEnd) {
|
|
||||||
if pkt.IsSent {
|
|
||||||
sent += int64(pkt.Size)
|
|
||||||
pktSent++
|
|
||||||
} else {
|
|
||||||
recv += int64(pkt.Size)
|
|
||||||
pktRecv++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rec.BytesSent = sent
|
|
||||||
rec.BytesReceived = recv
|
|
||||||
rec.PacketsSent = pktSent
|
|
||||||
rec.PacketsReceived = pktRecv
|
|
||||||
rec.TotalBytes = sent + recv
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func writeCSV(path string, records []*QueryRecord) error {
|
|
||||||
f, err := os.Create(path)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
w := csv.NewWriter(f)
|
|
||||||
defer w.Flush()
|
|
||||||
|
|
||||||
// Write header
|
|
||||||
header := []string{
|
|
||||||
"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
|
|
||||||
"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
|
|
||||||
"request_size_bytes", "response_size_bytes", "response_code", "error",
|
|
||||||
"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
|
|
||||||
}
|
|
||||||
if err := w.Write(header); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write data rows
|
|
||||||
for _, rec := range records {
|
|
||||||
row := []string{
|
|
||||||
rec.Domain,
|
|
||||||
rec.QueryType,
|
|
||||||
rec.Protocol,
|
|
||||||
rec.DNSSec,
|
|
||||||
rec.AuthDNSSec,
|
|
||||||
rec.KeepAlive,
|
|
||||||
rec.DNSServer,
|
|
||||||
rec.Timestamp,
|
|
||||||
strconv.FormatInt(rec.DurationNs, 10),
|
|
||||||
strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
|
|
||||||
strconv.Itoa(rec.RequestSizeBytes),
|
|
||||||
strconv.Itoa(rec.ResponseSizeBytes),
|
|
||||||
rec.ResponseCode,
|
|
||||||
rec.Error,
|
|
||||||
strconv.FormatInt(rec.BytesSent, 10),
|
|
||||||
strconv.FormatInt(rec.BytesReceived, 10),
|
|
||||||
strconv.FormatInt(rec.PacketsSent, 10),
|
|
||||||
strconv.FormatInt(rec.PacketsReceived, 10),
|
|
||||||
strconv.FormatInt(rec.TotalBytes, 10),
|
|
||||||
}
|
|
||||||
if err := w.Write(row); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
resultsDir := "results"
|
|
||||||
providers := []string{"adguard", "cloudflare", "google", "quad9"}
|
|
||||||
|
|
||||||
fmt.Println("╔═══════════════════════════════════════════════╗")
|
|
||||||
fmt.Println("║ DNS PCAP Preprocessor v1.0 ║")
|
|
||||||
fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║")
|
|
||||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
|
||||||
|
|
||||||
totalProcessed := 0
|
|
||||||
totalSkipped := 0
|
|
||||||
totalErrors := 0
|
|
||||||
|
|
||||||
for _, provider := range providers {
|
|
||||||
providerPath := filepath.Join(resultsDir, provider)
|
|
||||||
if _, err := os.Stat(providerPath); os.IsNotExist(err) {
|
|
||||||
fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := processProviderFolder(providerPath); err != nil {
|
|
||||||
log.Printf("Error processing %s: %v\n", provider, err)
|
|
||||||
totalErrors++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Println("\n╔═══════════════════════════════════════════════╗")
|
|
||||||
fmt.Println("║ Preprocessing Complete! ║")
|
|
||||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
|
||||||
fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
|
|
||||||
fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n")
|
|
||||||
fmt.Printf(" • bytes_received - Total bytes received from DNS server\n")
|
|
||||||
fmt.Printf(" • packets_sent - Number of packets sent\n")
|
|
||||||
fmt.Printf(" • packets_received - Number of packets received\n")
|
|
||||||
fmt.Printf(" • total_bytes - Sum of sent + received bytes\n")
|
|
||||||
fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
|
|
||||||
fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
|
|
||||||
fmt.Printf(" but all files now have complete bandwidth metrics!\n")
|
|
||||||
}
|
|
||||||
@@ -1,367 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Advanced PCAP filter for DNS traffic (with IPv6 support).
|
|
||||||
|
|
||||||
Filters out:
|
|
||||||
- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses)
|
|
||||||
- AdGuard DNS servers (for non-AdGuard captures)
|
|
||||||
- Non-DNS traffic based on protocol-specific ports
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
# Test machine IPs (IPv4 and IPv6 from your provided info)
|
|
||||||
TEST_IPV4 = '10.0.0.50'
|
|
||||||
TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e'
|
|
||||||
TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d'
|
|
||||||
|
|
||||||
# Port mappings
|
|
||||||
PORT_MAP = {
|
|
||||||
'udp': [53], # DNS-over-UDP
|
|
||||||
'tls': [53, 853], # DNS-over-TLS
|
|
||||||
'https': [53, 443], # DNS-over-HTTPS (DoH)
|
|
||||||
'doq': [53, 784, 8853], # DNS-over-QUIC
|
|
||||||
'doh3': [53, 443] # DNS-over-HTTP/3
|
|
||||||
}
|
|
||||||
|
|
||||||
# AdGuard DNS IPs to filter out (for non-AdGuard captures)
|
|
||||||
ADGUARD_IPS = [
|
|
||||||
'94.140.14.14',
|
|
||||||
'94.140.15.15',
|
|
||||||
'2a10:50c0::ad1:ff',
|
|
||||||
'2a10:50c0::ad2:ff'
|
|
||||||
]
|
|
||||||
|
|
||||||
def parse_filename(filename):
|
|
||||||
"""Extract protocol from filename"""
|
|
||||||
base = filename.replace('.pcap', '').replace('.csv', '')
|
|
||||||
parts = base.split('-')
|
|
||||||
|
|
||||||
if len(parts) < 1: # Minimum: protocol
|
|
||||||
return None
|
|
||||||
|
|
||||||
protocol = parts[0].lower()
|
|
||||||
return protocol
|
|
||||||
|
|
||||||
def extract_resolver_from_path(pcap_path):
|
|
||||||
"""Extract resolver name from directory structure"""
|
|
||||||
parts = Path(pcap_path).parts
|
|
||||||
|
|
||||||
for part in parts:
|
|
||||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
|
||||||
return part.lower()
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def build_filter_expression(protocol, resolver):
|
|
||||||
"""
|
|
||||||
Build tshark filter expression.
|
|
||||||
|
|
||||||
Strategy:
|
|
||||||
1. Only protocol-specific DNS ports
|
|
||||||
2. Keep only traffic involving the test machine (IPv4/IPv6)
|
|
||||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Get ports for this protocol
|
|
||||||
ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853])
|
|
||||||
|
|
||||||
# Build port filter (UDP or TCP on these ports)
|
|
||||||
port_conditions = []
|
|
||||||
for port in ports:
|
|
||||||
port_conditions.append(f'(udp.port == {port} or tcp.port == {port})')
|
|
||||||
|
|
||||||
port_filter = ' or '.join(port_conditions)
|
|
||||||
|
|
||||||
# Build test machine filter (keep if src or dst is test machine IP)
|
|
||||||
machine_conditions = [f'(ip.addr == {TEST_IPV4})']
|
|
||||||
if TEST_IPV6_GLOBAL:
|
|
||||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})')
|
|
||||||
if TEST_IPV6_LINKLOCAL:
|
|
||||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})')
|
|
||||||
|
|
||||||
machine_filter = ' or '.join(machine_conditions)
|
|
||||||
|
|
||||||
# Build AdGuard exclusion filter
|
|
||||||
adguard_exclusions = []
|
|
||||||
if resolver != 'adguard':
|
|
||||||
for ip in ADGUARD_IPS:
|
|
||||||
if ':' in ip: # IPv6
|
|
||||||
adguard_exclusions.append(f'!(ipv6.addr == {ip})')
|
|
||||||
else: # IPv4
|
|
||||||
adguard_exclusions.append(f'!(ip.addr == {ip})')
|
|
||||||
|
|
||||||
# Combine all filters
|
|
||||||
filters = [f'({port_filter})', f'({machine_filter})']
|
|
||||||
|
|
||||||
if adguard_exclusions:
|
|
||||||
adguard_filter = ' and '.join(adguard_exclusions)
|
|
||||||
filters.append(f'({adguard_filter})')
|
|
||||||
|
|
||||||
final_filter = ' and '.join(filters)
|
|
||||||
|
|
||||||
return final_filter
|
|
||||||
|
|
||||||
def filter_pcap(input_path, output_path, filter_expr, verbose=False):
|
|
||||||
"""Apply filter to PCAP file using tshark"""
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
'tshark',
|
|
||||||
'-r', input_path,
|
|
||||||
'-Y', filter_expr,
|
|
||||||
'-w', output_path,
|
|
||||||
'-F', 'pcap'
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
|
||||||
if verbose:
|
|
||||||
print(f" Filter: {filter_expr}")
|
|
||||||
|
|
||||||
result = subprocess.run(
|
|
||||||
cmd,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
timeout=300
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode != 0:
|
|
||||||
print(f" ✗ Error: {result.stderr.strip()}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not os.path.exists(output_path):
|
|
||||||
print(f" ✗ Output file not created")
|
|
||||||
return False
|
|
||||||
|
|
||||||
output_size = os.path.getsize(output_path)
|
|
||||||
if output_size < 24:
|
|
||||||
print(f" ⚠ Warning: Output is empty")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
print(f" ✗ Timeout (>5 minutes)")
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ✗ Exception: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def find_pcap_files(root_dir):
|
|
||||||
"""Recursively find all PCAP files"""
|
|
||||||
pcap_files = []
|
|
||||||
for root, dirs, files in os.walk(root_dir):
|
|
||||||
for file in files:
|
|
||||||
if file.endswith('.pcap'):
|
|
||||||
full_path = os.path.join(root, file)
|
|
||||||
pcap_files.append(full_path)
|
|
||||||
return sorted(pcap_files)
|
|
||||||
|
|
||||||
def format_bytes(bytes_val):
|
|
||||||
"""Format bytes as human readable"""
|
|
||||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
||||||
if bytes_val < 1024.0:
|
|
||||||
return f"{bytes_val:.1f} {unit}"
|
|
||||||
bytes_val /= 1024.0
|
|
||||||
return f"{bytes_val:.1f} TB"
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)',
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog='''
|
|
||||||
Filtering rules:
|
|
||||||
1. Only include traffic on protocol-specific DNS ports
|
|
||||||
2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses)
|
|
||||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
|
||||||
|
|
||||||
Protocol-specific ports:
|
|
||||||
udp: 53
|
|
||||||
tls: 53, 853
|
|
||||||
https: 53, 443
|
|
||||||
doq: 53, 784, 8853
|
|
||||||
doh3: 53, 443
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
# Dry run
|
|
||||||
%(prog)s ./results --dry-run
|
|
||||||
|
|
||||||
# Filter with verbose output
|
|
||||||
%(prog)s ./results --verbose
|
|
||||||
|
|
||||||
# Custom output directory
|
|
||||||
%(prog)s ./results --output ./cleaned
|
|
||||||
'''
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
'input_dir',
|
|
||||||
help='Input directory containing PCAP files'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'-o', '--output',
|
|
||||||
default='./results_filtered',
|
|
||||||
help='Output directory (default: ./results_filtered)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--dry-run',
|
|
||||||
action='store_true',
|
|
||||||
help='Show what would be done without filtering'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--limit',
|
|
||||||
type=int,
|
|
||||||
help='Only process first N files (for testing)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'-v', '--verbose',
|
|
||||||
action='store_true',
|
|
||||||
help='Verbose output (show filter expressions)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--overwrite',
|
|
||||||
action='store_true',
|
|
||||||
help='Overwrite existing filtered files'
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Check for tshark
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
['tshark', '-v'],
|
|
||||||
capture_output=True,
|
|
||||||
check=True
|
|
||||||
)
|
|
||||||
if args.verbose:
|
|
||||||
version = result.stdout.decode().split('\n')[0]
|
|
||||||
print(f"Using: {version}\n")
|
|
||||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
||||||
print("Error: tshark not found. Install Wireshark/tshark:")
|
|
||||||
print(" Ubuntu/Debian: sudo apt-get install tshark")
|
|
||||||
print(" macOS: brew install wireshark")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)")
|
|
||||||
print("=" * 80)
|
|
||||||
print("Filters:")
|
|
||||||
print(" 1. Protocol-specific DNS ports only")
|
|
||||||
print(" 2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)")
|
|
||||||
print(" 3. Exclude AdGuard IPs (for non-AdGuard captures)")
|
|
||||||
print(f"\nInput: {args.input_dir}")
|
|
||||||
print(f"Output: {args.output}")
|
|
||||||
|
|
||||||
# Find PCAP files
|
|
||||||
print(f"\nScanning for PCAP files...")
|
|
||||||
pcap_files = find_pcap_files(args.input_dir)
|
|
||||||
|
|
||||||
if not pcap_files:
|
|
||||||
print(f"No PCAP files found in {args.input_dir}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"Found {len(pcap_files)} PCAP files")
|
|
||||||
|
|
||||||
total_input_size = sum(os.path.getsize(f) for f in pcap_files)
|
|
||||||
print(f"Total size: {format_bytes(total_input_size)}")
|
|
||||||
|
|
||||||
if args.limit:
|
|
||||||
pcap_files = pcap_files[:args.limit]
|
|
||||||
print(f"Limiting to first {args.limit} files")
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
print("\n*** DRY RUN MODE ***\n")
|
|
||||||
else:
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Process files
|
|
||||||
success_count = 0
|
|
||||||
skip_count = 0
|
|
||||||
fail_count = 0
|
|
||||||
total_output_size = 0
|
|
||||||
|
|
||||||
for i, input_path in enumerate(pcap_files, 1):
|
|
||||||
# Extract info from path
|
|
||||||
filename = Path(input_path).name
|
|
||||||
protocol = parse_filename(filename)
|
|
||||||
resolver = extract_resolver_from_path(input_path)
|
|
||||||
|
|
||||||
if not protocol:
|
|
||||||
print(f"[{i}/{len(pcap_files)}] {filename}")
|
|
||||||
print(f" ⚠ Could not parse protocol, skipping")
|
|
||||||
skip_count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Create output path
|
|
||||||
rel_path = os.path.relpath(input_path, args.input_dir)
|
|
||||||
output_path = os.path.join(args.output, rel_path)
|
|
||||||
|
|
||||||
input_size = os.path.getsize(input_path)
|
|
||||||
|
|
||||||
print(f"[{i}/{len(pcap_files)}] {rel_path}")
|
|
||||||
print(f" Protocol: {protocol.upper()}")
|
|
||||||
print(f" Resolver: {resolver or 'unknown'}")
|
|
||||||
print(f" Size: {format_bytes(input_size)}")
|
|
||||||
|
|
||||||
# Check if already filtered
|
|
||||||
if os.path.exists(output_path) and not args.overwrite:
|
|
||||||
output_size = os.path.getsize(output_path)
|
|
||||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
|
||||||
print(f" ⊙ Already filtered: {format_bytes(output_size)} "
|
|
||||||
f"({reduction:.1f}% reduction)")
|
|
||||||
skip_count += 1
|
|
||||||
total_output_size += output_size
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Build filter
|
|
||||||
filter_expr = build_filter_expression(protocol, resolver)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
print(f" → Would filter")
|
|
||||||
if args.verbose:
|
|
||||||
print(f" Filter: {filter_expr}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Create output directory
|
|
||||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
||||||
|
|
||||||
# Filter
|
|
||||||
success = filter_pcap(input_path, output_path, filter_expr, args.verbose)
|
|
||||||
|
|
||||||
if success:
|
|
||||||
output_size = os.path.getsize(output_path)
|
|
||||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
|
||||||
print(f" ✓ Filtered: {format_bytes(output_size)} "
|
|
||||||
f"({reduction:.1f}% reduction)")
|
|
||||||
success_count += 1
|
|
||||||
total_output_size += output_size
|
|
||||||
else:
|
|
||||||
fail_count += 1
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("SUMMARY")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
print(f"Would process: {len(pcap_files)} files")
|
|
||||||
else:
|
|
||||||
print(f"Successful: {success_count}")
|
|
||||||
print(f"Skipped: {skip_count} (already filtered or unparseable)")
|
|
||||||
print(f"Failed: {fail_count}")
|
|
||||||
print(f"Total: {len(pcap_files)}")
|
|
||||||
|
|
||||||
if success_count > 0 or skip_count > 0:
|
|
||||||
print(f"\nInput size: {format_bytes(total_input_size)}")
|
|
||||||
print(f"Output size: {format_bytes(total_output_size)}")
|
|
||||||
if total_input_size > 0:
|
|
||||||
reduction = ((total_input_size - total_output_size) /
|
|
||||||
total_input_size * 100)
|
|
||||||
print(f"Reduction: {reduction:.1f}%")
|
|
||||||
print(f"\nOutput directory: {args.output}")
|
|
||||||
|
|
||||||
return 0 if fail_count == 0 else 1
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
exit(main())
|
|
||||||
@@ -1,426 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Convert DNS CSV files to SQLite database.
|
|
||||||
Creates a single normalized table with unified DNSSEC handling.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
import csv
|
|
||||||
from pathlib import Path
|
|
||||||
from dateutil import parser as date_parser
|
|
||||||
|
|
||||||
|
|
||||||
def create_database_schema(conn: sqlite3.Connection):
|
|
||||||
"""Create the database schema with indexes."""
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Main queries table
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE TABLE IF NOT EXISTS dns_queries (
|
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
||||||
|
|
||||||
-- Metadata
|
|
||||||
provider TEXT NOT NULL,
|
|
||||||
protocol TEXT NOT NULL,
|
|
||||||
dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
|
|
||||||
|
|
||||||
-- Query details
|
|
||||||
domain TEXT NOT NULL,
|
|
||||||
query_type TEXT NOT NULL,
|
|
||||||
keep_alive BOOLEAN NOT NULL,
|
|
||||||
dns_server TEXT NOT NULL,
|
|
||||||
|
|
||||||
-- Timing
|
|
||||||
timestamp TEXT NOT NULL,
|
|
||||||
timestamp_unix REAL NOT NULL,
|
|
||||||
duration_ns INTEGER NOT NULL,
|
|
||||||
duration_ms REAL NOT NULL,
|
|
||||||
|
|
||||||
-- Size metrics
|
|
||||||
request_size_bytes INTEGER,
|
|
||||||
response_size_bytes INTEGER,
|
|
||||||
|
|
||||||
-- Network metrics (from PCAP)
|
|
||||||
bytes_sent INTEGER DEFAULT 0,
|
|
||||||
bytes_received INTEGER DEFAULT 0,
|
|
||||||
packets_sent INTEGER DEFAULT 0,
|
|
||||||
packets_received INTEGER DEFAULT 0,
|
|
||||||
total_bytes INTEGER DEFAULT 0,
|
|
||||||
|
|
||||||
-- Response
|
|
||||||
response_code TEXT,
|
|
||||||
error TEXT
|
|
||||||
)
|
|
||||||
""")
|
|
||||||
|
|
||||||
# Create indexes for common queries
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_provider
|
|
||||||
ON dns_queries(provider)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_protocol
|
|
||||||
ON dns_queries(protocol)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_dnssec_mode
|
|
||||||
ON dns_queries(dnssec_mode)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_keep_alive
|
|
||||||
ON dns_queries(keep_alive)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec
|
|
||||||
ON dns_queries(provider, protocol, dnssec_mode)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_timestamp
|
|
||||||
ON dns_queries(timestamp_unix)
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_domain
|
|
||||||
ON dns_queries(domain)
|
|
||||||
""")
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
|
|
||||||
"""
|
|
||||||
Extract base protocol, DNSSEC mode, and keep_alive from filename.
|
|
||||||
Returns (base_protocol, dnssec_mode, keep_alive)
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
'udp.csv' -> ('udp', 'off', False)
|
|
||||||
'udp-auth.csv' -> ('udp', 'auth', False)
|
|
||||||
'tls.csv' -> ('tls', 'off', False)
|
|
||||||
'tls-persist.csv' -> ('tls', 'off', True)
|
|
||||||
'https-persist.csv' -> ('https', 'off', True)
|
|
||||||
'https-auth-persist.csv' -> ('https', 'auth', True)
|
|
||||||
'https-trust-persist.csv' -> ('https', 'trust', True)
|
|
||||||
'doh3-auth.csv' -> ('doh3', 'auth', False)
|
|
||||||
'doq.csv' -> ('doq', 'off', False)
|
|
||||||
"""
|
|
||||||
name = filename.replace('.csv', '')
|
|
||||||
|
|
||||||
# Check for persist suffix (keep_alive)
|
|
||||||
keep_alive = False
|
|
||||||
if name.endswith('-persist'):
|
|
||||||
keep_alive = True
|
|
||||||
name = name.replace('-persist', '')
|
|
||||||
|
|
||||||
# Check for DNSSEC suffix
|
|
||||||
dnssec_mode = 'off'
|
|
||||||
if name.endswith('-auth'):
|
|
||||||
dnssec_mode = 'auth'
|
|
||||||
name = name.replace('-auth', '')
|
|
||||||
elif name.endswith('-trust'):
|
|
||||||
dnssec_mode = 'trust'
|
|
||||||
name = name.replace('-trust', '')
|
|
||||||
|
|
||||||
# For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
|
|
||||||
if name in ['udp', 'doh3', 'doq']:
|
|
||||||
keep_alive = False
|
|
||||||
|
|
||||||
return (name, dnssec_mode, keep_alive)
|
|
||||||
|
|
||||||
|
|
||||||
def str_to_bool(value: str) -> bool:
|
|
||||||
"""Convert string boolean to Python bool."""
|
|
||||||
return value.lower() in ('true', '1', 'yes')
|
|
||||||
|
|
||||||
|
|
||||||
def import_csv_to_db(
|
|
||||||
csv_path: Path,
|
|
||||||
provider: str,
|
|
||||||
conn: sqlite3.Connection
|
|
||||||
) -> int:
|
|
||||||
"""Import a CSV file into the database."""
|
|
||||||
protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
rows_imported = 0
|
|
||||||
|
|
||||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
||||||
reader = csv.DictReader(f)
|
|
||||||
|
|
||||||
for row in reader:
|
|
||||||
try:
|
|
||||||
# Parse timestamp to Unix epoch
|
|
||||||
dt = date_parser.isoparse(row['timestamp'])
|
|
||||||
timestamp_unix = dt.timestamp()
|
|
||||||
|
|
||||||
# Use keep_alive from filename (more reliable than CSV)
|
|
||||||
keep_alive = keep_alive_from_filename
|
|
||||||
|
|
||||||
# Handle optional fields (may not exist in older CSVs)
|
|
||||||
bytes_sent = int(row.get('bytes_sent', 0) or 0)
|
|
||||||
bytes_received = int(row.get('bytes_received', 0) or 0)
|
|
||||||
packets_sent = int(row.get('packets_sent', 0) or 0)
|
|
||||||
packets_received = int(row.get('packets_received', 0) or 0)
|
|
||||||
total_bytes = int(row.get('total_bytes', 0) or 0)
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
INSERT INTO dns_queries (
|
|
||||||
provider, protocol, dnssec_mode,
|
|
||||||
domain, query_type, keep_alive,
|
|
||||||
dns_server, timestamp, timestamp_unix,
|
|
||||||
duration_ns, duration_ms,
|
|
||||||
request_size_bytes, response_size_bytes,
|
|
||||||
bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
|
|
||||||
response_code, error
|
|
||||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
||||||
""", (
|
|
||||||
provider,
|
|
||||||
protocol,
|
|
||||||
dnssec_mode,
|
|
||||||
row['domain'],
|
|
||||||
row['query_type'],
|
|
||||||
keep_alive,
|
|
||||||
row['dns_server'],
|
|
||||||
row['timestamp'],
|
|
||||||
timestamp_unix,
|
|
||||||
int(row['duration_ns']),
|
|
||||||
float(row['duration_ms']),
|
|
||||||
int(row.get('request_size_bytes') or 0),
|
|
||||||
int(row.get('response_size_bytes') or 0),
|
|
||||||
bytes_sent,
|
|
||||||
bytes_received,
|
|
||||||
packets_sent,
|
|
||||||
packets_received,
|
|
||||||
total_bytes,
|
|
||||||
row.get('response_code', ''),
|
|
||||||
row.get('error', '')
|
|
||||||
))
|
|
||||||
|
|
||||||
rows_imported += 1
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Warning: Skipping row - {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
return rows_imported
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main import pipeline."""
|
|
||||||
print("\n" + "="*60)
|
|
||||||
print("CSV to SQLite Database Converter")
|
|
||||||
print("="*60)
|
|
||||||
|
|
||||||
results_dir = Path('results')
|
|
||||||
db_path = Path('dns.db')
|
|
||||||
|
|
||||||
if not results_dir.exists():
|
|
||||||
print(f"\n❌ Error: '{results_dir}' directory not found")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Remove existing database
|
|
||||||
if db_path.exists():
|
|
||||||
print(f"\n⚠ Removing existing database: {db_path}")
|
|
||||||
db_path.unlink()
|
|
||||||
|
|
||||||
# Create database and schema
|
|
||||||
print(f"\n📊 Creating database: {db_path}")
|
|
||||||
conn = sqlite3.connect(db_path)
|
|
||||||
create_database_schema(conn)
|
|
||||||
print("✓ Schema created")
|
|
||||||
|
|
||||||
# Import CSVs
|
|
||||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
|
||||||
total_rows = 0
|
|
||||||
total_files = 0
|
|
||||||
|
|
||||||
for provider in providers:
|
|
||||||
provider_path = results_dir / provider
|
|
||||||
|
|
||||||
if not provider_path.exists():
|
|
||||||
print(f"\n⚠ Skipping {provider} - directory not found")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print(f"Importing: {provider.upper()}")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
|
|
||||||
csv_files = sorted(provider_path.glob('*.csv'))
|
|
||||||
provider_rows = 0
|
|
||||||
provider_files = 0
|
|
||||||
|
|
||||||
for csv_path in csv_files:
|
|
||||||
# Skip backup files
|
|
||||||
if '.bak' in csv_path.name:
|
|
||||||
continue
|
|
||||||
|
|
||||||
protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
|
|
||||||
ka_str = "persistent" if keep_alive else "non-persist"
|
|
||||||
print(f" 📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
|
|
||||||
|
|
||||||
rows = import_csv_to_db(csv_path, provider, conn)
|
|
||||||
print(f" ✓ Imported {rows:,} rows")
|
|
||||||
|
|
||||||
provider_rows += rows
|
|
||||||
provider_files += 1
|
|
||||||
|
|
||||||
print(f"\n Total: {provider_files} files, {provider_rows:,} rows")
|
|
||||||
total_rows += provider_rows
|
|
||||||
total_files += provider_files
|
|
||||||
|
|
||||||
# Create summary
|
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print("Database Summary")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Total counts
|
|
||||||
cursor.execute("SELECT COUNT(*) FROM dns_queries")
|
|
||||||
total_queries = cursor.fetchone()[0]
|
|
||||||
|
|
||||||
cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
|
|
||||||
unique_providers = cursor.fetchone()[0]
|
|
||||||
|
|
||||||
cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
|
|
||||||
unique_protocols = cursor.fetchone()[0]
|
|
||||||
|
|
||||||
cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
|
|
||||||
unique_domains = cursor.fetchone()[0]
|
|
||||||
|
|
||||||
print(f"\nTotal queries: {total_queries:,}")
|
|
||||||
print(f"Providers: {unique_providers}")
|
|
||||||
print(f"Protocols: {unique_protocols}")
|
|
||||||
print(f"Unique domains: {unique_domains}")
|
|
||||||
|
|
||||||
# Show breakdown by provider, protocol, DNSSEC, and keep_alive
|
|
||||||
print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
|
|
||||||
print(f"{'-'*80}")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
|
|
||||||
FROM dns_queries
|
|
||||||
GROUP BY provider, protocol, dnssec_mode, keep_alive
|
|
||||||
ORDER BY provider, protocol, dnssec_mode, keep_alive
|
|
||||||
""")
|
|
||||||
|
|
||||||
current_provider = None
|
|
||||||
for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
|
|
||||||
if current_provider != provider:
|
|
||||||
if current_provider is not None:
|
|
||||||
print()
|
|
||||||
current_provider = provider
|
|
||||||
|
|
||||||
ka_str = "✓" if keep_alive else "✗"
|
|
||||||
print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
|
|
||||||
|
|
||||||
# Protocol distribution
|
|
||||||
print(f"\n{'-'*80}")
|
|
||||||
print("Protocol Distribution:")
|
|
||||||
print(f"{'-'*80}")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
SELECT protocol, COUNT(*) as count
|
|
||||||
FROM dns_queries
|
|
||||||
GROUP BY protocol
|
|
||||||
ORDER BY protocol
|
|
||||||
""")
|
|
||||||
|
|
||||||
for protocol, count in cursor.fetchall():
|
|
||||||
pct = (count / total_queries) * 100
|
|
||||||
print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
|
|
||||||
|
|
||||||
# DNSSEC mode distribution
|
|
||||||
print(f"\n{'-'*80}")
|
|
||||||
print("DNSSEC Mode Distribution:")
|
|
||||||
print(f"{'-'*80}")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
SELECT dnssec_mode, COUNT(*) as count
|
|
||||||
FROM dns_queries
|
|
||||||
GROUP BY dnssec_mode
|
|
||||||
ORDER BY dnssec_mode
|
|
||||||
""")
|
|
||||||
|
|
||||||
for dnssec_mode, count in cursor.fetchall():
|
|
||||||
pct = (count / total_queries) * 100
|
|
||||||
print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
|
|
||||||
|
|
||||||
# Keep-Alive distribution
|
|
||||||
print(f"\n{'-'*80}")
|
|
||||||
print("Keep-Alive Distribution:")
|
|
||||||
print(f"{'-'*80}")
|
|
||||||
|
|
||||||
cursor.execute("""
|
|
||||||
SELECT keep_alive, COUNT(*) as count
|
|
||||||
FROM dns_queries
|
|
||||||
GROUP BY keep_alive
|
|
||||||
""")
|
|
||||||
|
|
||||||
for keep_alive, count in cursor.fetchall():
|
|
||||||
ka_label = "Persistent" if keep_alive else "Non-persistent"
|
|
||||||
pct = (count / total_queries) * 100
|
|
||||||
print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print(f"✓ Database created successfully: {db_path}")
|
|
||||||
print(f" Total: {total_files} files, {total_rows:,} rows")
|
|
||||||
print(f"{'='*60}\n")
|
|
||||||
|
|
||||||
# Print usage examples
|
|
||||||
print("\n📖 Usage Examples for Metabase:")
|
|
||||||
print(f"{'-'*60}")
|
|
||||||
|
|
||||||
print("\n1. Compare protocols (DNSSEC off, persistent only):")
|
|
||||||
print(""" SELECT provider, protocol,
|
|
||||||
AVG(duration_ms) as avg_latency,
|
|
||||||
AVG(total_bytes) as avg_bytes
|
|
||||||
FROM dns_queries
|
|
||||||
WHERE dnssec_mode = 'off' AND keep_alive = 1
|
|
||||||
GROUP BY provider, protocol;""")
|
|
||||||
|
|
||||||
print("\n2. DNSSEC impact on UDP:")
|
|
||||||
print(""" SELECT provider, dnssec_mode,
|
|
||||||
AVG(duration_ms) as avg_latency
|
|
||||||
FROM dns_queries
|
|
||||||
WHERE protocol = 'udp'
|
|
||||||
GROUP BY provider, dnssec_mode;""")
|
|
||||||
|
|
||||||
print("\n3. Keep-alive impact on TLS:")
|
|
||||||
print(""" SELECT provider, keep_alive,
|
|
||||||
AVG(duration_ms) as avg_latency,
|
|
||||||
AVG(total_bytes) as avg_bytes
|
|
||||||
FROM dns_queries
|
|
||||||
WHERE protocol = 'tls' AND dnssec_mode = 'off'
|
|
||||||
GROUP BY provider, keep_alive;""")
|
|
||||||
|
|
||||||
print("\n4. Time series for line graphs:")
|
|
||||||
print(""" SELECT timestamp_unix, duration_ms, total_bytes
|
|
||||||
FROM dns_queries
|
|
||||||
WHERE provider = 'cloudflare'
|
|
||||||
AND protocol = 'https'
|
|
||||||
AND dnssec_mode = 'off'
|
|
||||||
AND keep_alive = 1
|
|
||||||
ORDER BY timestamp_unix;""")
|
|
||||||
|
|
||||||
print("\n5. Overall comparison table:")
|
|
||||||
print(""" SELECT protocol, dnssec_mode, keep_alive,
|
|
||||||
COUNT(*) as queries,
|
|
||||||
AVG(duration_ms) as avg_latency,
|
|
||||||
AVG(total_bytes) as avg_bytes
|
|
||||||
FROM dns_queries
|
|
||||||
GROUP BY protocol, dnssec_mode, keep_alive
|
|
||||||
ORDER BY protocol, dnssec_mode, keep_alive;""")
|
|
||||||
|
|
||||||
print(f"\n{'-'*60}\n")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
@@ -1,274 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Merge DNS test files by configuration.
|
|
||||||
|
|
||||||
- Merges CSVs of same config (adds 'run_id' column for traceability)
|
|
||||||
- Optionally merges PCAPs using mergecap
|
|
||||||
- Flattens date structure
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import csv
|
|
||||||
import subprocess
|
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
import argparse
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
def parse_filename(filename):
|
|
||||||
"""
|
|
||||||
Extract config key from filename.
|
|
||||||
Format: protocol[-flags]-timestamp.{csv,pcap}
|
|
||||||
Config key: protocol[-flags] (ignores timestamp)
|
|
||||||
"""
|
|
||||||
base = filename.replace('.csv', '').replace('.pcap', '')
|
|
||||||
parts = base.split('-')
|
|
||||||
|
|
||||||
if len(parts) < 2:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Config is everything except timestamp
|
|
||||||
config = '-'.join(parts[:-1])
|
|
||||||
timestamp = parts[-1]
|
|
||||||
|
|
||||||
return config, timestamp
|
|
||||||
|
|
||||||
def extract_resolver_from_path(file_path):
|
|
||||||
"""Extract resolver name from path"""
|
|
||||||
parts = Path(file_path).parts
|
|
||||||
for part in parts:
|
|
||||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
|
||||||
return part.lower()
|
|
||||||
return None
|
|
||||||
|
|
||||||
def find_files(root_dir, extension):
|
|
||||||
"""Find all files with given extension"""
|
|
||||||
files = []
|
|
||||||
for root, dirs, filenames in os.walk(root_dir):
|
|
||||||
for filename in filenames:
|
|
||||||
if filename.endswith(extension):
|
|
||||||
full_path = os.path.join(root, filename)
|
|
||||||
files.append(full_path)
|
|
||||||
return sorted(files)
|
|
||||||
|
|
||||||
def merge_csvs(csv_files, output_path, fieldnames):
|
|
||||||
"""Merge multiple CSVs into one, adding 'run_id' column"""
|
|
||||||
with open(output_path, 'w', newline='') as outfile:
|
|
||||||
writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id'])
|
|
||||||
writer.writeheader()
|
|
||||||
|
|
||||||
for csv_path in csv_files:
|
|
||||||
# Use timestamp as run_id
|
|
||||||
filename = Path(csv_path).name
|
|
||||||
_, timestamp = parse_filename(filename)
|
|
||||||
run_id = timestamp # Or add date if needed
|
|
||||||
|
|
||||||
with open(csv_path, 'r', newline='') as infile:
|
|
||||||
reader = csv.DictReader(infile)
|
|
||||||
for row in reader:
|
|
||||||
row['run_id'] = run_id
|
|
||||||
writer.writerow(row)
|
|
||||||
|
|
||||||
def merge_pcaps(pcap_files, output_path):
|
|
||||||
"""Merge PCAP files using mergecap"""
|
|
||||||
cmd = ['mergecap', '-w', output_path] + pcap_files
|
|
||||||
try:
|
|
||||||
subprocess.run(cmd, capture_output=True, check=True)
|
|
||||||
return True
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
print(f" ✗ mergecap error: {e.stderr.decode()}")
|
|
||||||
return False
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("Error: mergecap not found. Install Wireshark:")
|
|
||||||
print(" Ubuntu: sudo apt install wireshark-common")
|
|
||||||
print(" macOS: brew install wireshark")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def format_bytes(bytes_val):
|
|
||||||
"""Format bytes as human readable"""
|
|
||||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
||||||
if bytes_val < 1024.0:
|
|
||||||
return f"{bytes_val:.1f} {unit}"
|
|
||||||
bytes_val /= 1024.0
|
|
||||||
return f"{bytes_val:.1f} TB"
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description='Merge DNS test files by configuration',
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog='''
|
|
||||||
Merges files of same config across dates/timestamps.
|
|
||||||
Output: ./results_merged/[resolver]/[config].csv (merged)
|
|
||||||
./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps)
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
# Dry run to preview
|
|
||||||
%(prog)s ./results --dry-run
|
|
||||||
|
|
||||||
# Merge CSVs only (recommended)
|
|
||||||
%(prog)s ./results
|
|
||||||
|
|
||||||
# Merge CSVs and PCAPs
|
|
||||||
%(prog)s ./results --merge-pcaps
|
|
||||||
|
|
||||||
# Custom output directory
|
|
||||||
%(prog)s ./results --output ./merged_data
|
|
||||||
'''
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
'input_dir',
|
|
||||||
help='Input directory (e.g., ./results)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--output',
|
|
||||||
default='./results_merged',
|
|
||||||
help='Output directory (default: ./results_merged)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--merge-pcaps',
|
|
||||||
action='store_true',
|
|
||||||
help='Merge PCAP files (requires mergecap from Wireshark)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--dry-run',
|
|
||||||
action='store_true',
|
|
||||||
help='Show what would be done without merging'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'-y', '--yes',
|
|
||||||
action='store_true',
|
|
||||||
help='Skip confirmation prompt'
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if not os.path.isdir(args.input_dir):
|
|
||||||
print(f"Error: Input directory not found: {args.input_dir}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Find all files
|
|
||||||
print("=" * 80)
|
|
||||||
print("MERGE DNS TEST FILES")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Input: {args.input_dir}")
|
|
||||||
print(f"Output: {args.output}")
|
|
||||||
print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}")
|
|
||||||
|
|
||||||
csv_files = find_files(args.input_dir, '.csv')
|
|
||||||
pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else []
|
|
||||||
|
|
||||||
if not csv_files and not pcap_files:
|
|
||||||
print("\nNo CSV/PCAP files found")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"\nFound {len(csv_files)} CSV files")
|
|
||||||
if args.merge_pcaps:
|
|
||||||
print(f"Found {len(pcap_files)} PCAP files")
|
|
||||||
|
|
||||||
# Group files by resolver and config
|
|
||||||
csv_groups = defaultdict(list)
|
|
||||||
pcap_groups = defaultdict(list)
|
|
||||||
|
|
||||||
for csv_path in csv_files:
|
|
||||||
config, _ = parse_filename(Path(csv_path).name)
|
|
||||||
resolver = extract_resolver_from_path(csv_path)
|
|
||||||
if config and resolver:
|
|
||||||
key = (resolver, config)
|
|
||||||
csv_groups[key].append(csv_path)
|
|
||||||
|
|
||||||
for pcap_path in pcap_files:
|
|
||||||
config, _ = parse_filename(Path(pcap_path).name)
|
|
||||||
resolver = extract_resolver_from_path(pcap_path)
|
|
||||||
if config and resolver:
|
|
||||||
key = (resolver, config)
|
|
||||||
pcap_groups[key].append(pcap_path)
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
print("\nConfigs to merge:")
|
|
||||||
print("-" * 80)
|
|
||||||
for (resolver, config), files in sorted(csv_groups.items()):
|
|
||||||
print(f" {resolver}/{config}: {len(files)} runs")
|
|
||||||
|
|
||||||
total_runs = sum(len(files) for files in csv_groups.values())
|
|
||||||
print(f"\nTotal configs: {len(csv_groups)}")
|
|
||||||
print(f"Total runs: {total_runs}")
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
print("\n*** DRY RUN MODE ***\n")
|
|
||||||
for (resolver, config) in sorted(csv_groups.keys()):
|
|
||||||
print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)")
|
|
||||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
|
||||||
print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Confirmation
|
|
||||||
if not args.yes:
|
|
||||||
response = input(f"\nMerge all into {args.output}? [y/N] ")
|
|
||||||
if response.lower() not in ['y', 'yes']:
|
|
||||||
print("Cancelled")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Merge
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("MERGING FILES")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
success_count = 0
|
|
||||||
fail_count = 0
|
|
||||||
total_queries = 0
|
|
||||||
total_size = 0
|
|
||||||
|
|
||||||
# Get standard CSV fieldnames (from first file)
|
|
||||||
first_csv = next(iter(csv_files))
|
|
||||||
with open(first_csv, 'r') as f:
|
|
||||||
reader = csv.DictReader(f)
|
|
||||||
fieldnames = reader.fieldnames
|
|
||||||
|
|
||||||
for (resolver, config), files in sorted(csv_groups.items()):
|
|
||||||
print(f"\n{resolver}/{config} ({len(files)} runs)")
|
|
||||||
|
|
||||||
# Merge CSVs
|
|
||||||
output_csv = os.path.join(args.output, resolver, f"{config}.csv")
|
|
||||||
os.makedirs(os.path.dirname(output_csv), exist_ok=True)
|
|
||||||
|
|
||||||
merge_csvs(files, output_csv, fieldnames)
|
|
||||||
|
|
||||||
# Count queries in merged file
|
|
||||||
with open(output_csv, 'r') as f:
|
|
||||||
query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header
|
|
||||||
|
|
||||||
print(f" ✓ Merged CSV: {query_count:,} queries")
|
|
||||||
total_queries += query_count
|
|
||||||
success_count += 1
|
|
||||||
|
|
||||||
# Merge PCAPs if requested
|
|
||||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
|
||||||
output_pcap = os.path.join(args.output, resolver, f"{config}.pcap")
|
|
||||||
pcap_list = pcap_groups[(resolver, config)]
|
|
||||||
|
|
||||||
if merge_pcaps(pcap_list, output_pcap):
|
|
||||||
merged_size = os.path.getsize(output_pcap)
|
|
||||||
orig_size = sum(os.path.getsize(p) for p in pcap_list)
|
|
||||||
print(f" ✓ Merged PCAP: {format_bytes(merged_size)} "
|
|
||||||
f"(from {format_bytes(orig_size)})")
|
|
||||||
total_size += merged_size
|
|
||||||
else:
|
|
||||||
print(f" ✗ PCAP merge failed")
|
|
||||||
fail_count += 1
|
|
||||||
|
|
||||||
# Final summary
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("COMPLETE")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Successful configs: {success_count}")
|
|
||||||
print(f"Failed: {fail_count}")
|
|
||||||
print(f"Total queries: {total_queries:,}")
|
|
||||||
if args.merge_pcaps:
|
|
||||||
print(f"Total PCAP size: {format_bytes(total_size)}")
|
|
||||||
print(f"\nMerged files in: {args.output}")
|
|
||||||
|
|
||||||
return 0 if fail_count == 0 else 1
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
exit(main())
|
|
||||||
Reference in New Issue
Block a user