feat(profiling): Add CPU and MEM profiling
This commit is contained in:
Generated
-27
@@ -1,27 +0,0 @@
|
||||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1760103332,
|
||||
"narHash": "sha256-BMsGVfKl4Q80Pr9T1AkCRljO1bpwCmY8rTBVj8XGuhA=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "870493f9a8cb0b074ae5b411b2f232015db19a65",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixpkgs-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
{
|
||||
description = "A Nix-flake-based Go 1.22 development environment";
|
||||
|
||||
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
|
||||
|
||||
outputs = inputs:
|
||||
let
|
||||
goVersion = 25; # Change this to update the whole stack
|
||||
|
||||
supportedSystems = [ "aarch64-darwin" ];
|
||||
forEachSupportedSystem = f: inputs.nixpkgs.lib.genAttrs supportedSystems (system: f {
|
||||
pkgs = import inputs.nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ inputs.self.overlays.default ];
|
||||
};
|
||||
});
|
||||
in
|
||||
{
|
||||
overlays.default = final: prev: {
|
||||
go = final."go_1_${toString goVersion}";
|
||||
};
|
||||
|
||||
devShells = forEachSupportedSystem ({ pkgs }: {
|
||||
default = pkgs.mkShell {
|
||||
packages = with pkgs; [
|
||||
go
|
||||
gotools
|
||||
golangci-lint
|
||||
(pkgs.python3.withPackages (python-pkgs: with python-pkgs; [
|
||||
pandas
|
||||
matplotlib
|
||||
seaborn
|
||||
]))
|
||||
];
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/afonsofrancof/sdns-proxy/client"
|
||||
"github.com/afonsofrancof/sdns-proxy/internal/qol/capture"
|
||||
"github.com/afonsofrancof/sdns-proxy/internal/qol/results"
|
||||
"github.com/afonsofrancof/sdns-proxy/internal/qol/stats"
|
||||
"github.com/google/gopacket/pcap"
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
@@ -86,13 +87,16 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT
|
||||
defer dnsClient.Close()
|
||||
|
||||
// Setup output files
|
||||
csvPath, pcapPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)
|
||||
csvPath, pcapPath, memPath := GenerateOutputPaths(r.config.OutputDir, upstream, r.config.DNSSEC, r.config.AuthoritativeDNSSEC, r.config.KeepAlive)
|
||||
|
||||
// Create directory if it doesn't exist
|
||||
if err := os.MkdirAll(filepath.Dir(csvPath), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create output directory: %w", err)
|
||||
}
|
||||
|
||||
// Initialize runtime collector with memPath
|
||||
runtimeCollector := stats.NewRuntimeCollector(memPath)
|
||||
|
||||
keepAliveStr := ""
|
||||
if r.config.KeepAlive {
|
||||
keepAliveStr = " (keep-alive)"
|
||||
@@ -118,7 +122,17 @@ func (r *MeasurementRunner) runPerUpstream(upstream string, domains []string, qT
|
||||
|
||||
time.Sleep(time.Second)
|
||||
// Run measurements
|
||||
return r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
|
||||
err = r.runQueries(dnsClient, upstream, domains, qType, writer, packetCapture)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write summed mem stats for the entire run
|
||||
if err := runtimeCollector.WriteStats(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Warning: failed to write mem stats to %s: %v\n", memPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *MeasurementRunner) runQueries(dnsClient client.DNSClient, upstream string,
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
package stats
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RuntimeStats struct {
|
||||
TotalAlloc uint64
|
||||
Mallocs uint64
|
||||
NumGC uint32
|
||||
AllocDelta uint64
|
||||
MallocsDelta uint64
|
||||
GCDelta uint32
|
||||
}
|
||||
|
||||
type RuntimeCollector struct {
|
||||
startStats runtime.MemStats
|
||||
memPath string
|
||||
}
|
||||
|
||||
func NewRuntimeCollector(memPath string) *RuntimeCollector {
|
||||
var stats runtime.MemStats
|
||||
runtime.ReadMemStats(&stats)
|
||||
|
||||
return &RuntimeCollector{
|
||||
startStats: stats,
|
||||
memPath: memPath,
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *RuntimeCollector) Collect() RuntimeStats {
|
||||
var current runtime.MemStats
|
||||
runtime.ReadMemStats(¤t)
|
||||
|
||||
return RuntimeStats{
|
||||
TotalAlloc: current.TotalAlloc,
|
||||
Mallocs: current.Mallocs,
|
||||
NumGC: current.NumGC,
|
||||
AllocDelta: current.TotalAlloc - rc.startStats.TotalAlloc,
|
||||
MallocsDelta: current.Mallocs - rc.startStats.Mallocs,
|
||||
GCDelta: current.NumGC - rc.startStats.NumGC,
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *RuntimeCollector) WriteStats() error {
|
||||
stats := rc.Collect()
|
||||
timestamp := time.Now().Format(time.RFC3339Nano)
|
||||
|
||||
// Check if file exists
|
||||
fileExists := false
|
||||
if _, err := os.Stat(rc.memPath); err == nil {
|
||||
fileExists = true
|
||||
}
|
||||
|
||||
// Open in append mode
|
||||
file, err := os.OpenFile(rc.memPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open mem.csv: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := csv.NewWriter(file)
|
||||
|
||||
// Write header if new file
|
||||
if !fileExists {
|
||||
header := []string{
|
||||
"timestamp", "total_alloc_bytes", "mallocs", "gc_cycles",
|
||||
"alloc_delta", "mallocs_delta", "gc_delta",
|
||||
}
|
||||
if err := writer.Write(header); err != nil {
|
||||
return fmt.Errorf("failed to write mem.csv header: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
row := []string{
|
||||
timestamp,
|
||||
fmt.Sprintf("%d", stats.TotalAlloc),
|
||||
fmt.Sprintf("%d", stats.Mallocs),
|
||||
fmt.Sprintf("%d", stats.NumGC),
|
||||
fmt.Sprintf("%d", stats.AllocDelta),
|
||||
fmt.Sprintf("%d", stats.MallocsDelta),
|
||||
fmt.Sprintf("%d", stats.GCDelta),
|
||||
}
|
||||
if err := writer.Write(row); err != nil {
|
||||
return fmt.Errorf("failed to write mem.csv row: %w", err)
|
||||
}
|
||||
|
||||
writer.Flush()
|
||||
return writer.Error()
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
// ./internal/qol/utils.go
|
||||
package qol
|
||||
|
||||
import (
|
||||
@@ -8,7 +7,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath string) {
|
||||
func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAlive bool) (csvPath, pcapPath, memPath string) {
|
||||
proto := DetectProtocol(upstream)
|
||||
cleanServer := cleanServerName(upstream)
|
||||
|
||||
@@ -32,8 +31,10 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli
|
||||
base = fmt.Sprintf("%s-%s", base, strings.Join(flags, "-"))
|
||||
}
|
||||
|
||||
return filepath.Join(subDir, base+".csv"),
|
||||
filepath.Join(subDir, base+".pcap")
|
||||
csvPath = filepath.Join(subDir, base+".csv")
|
||||
pcapPath = filepath.Join(subDir, base+".pcap")
|
||||
memPath = filepath.Join(subDir, base+".mem.csv")
|
||||
return
|
||||
}
|
||||
|
||||
func cleanServerName(server string) string {
|
||||
@@ -84,7 +85,6 @@ func DetectProtocol(upstream string) string {
|
||||
u, err := url.Parse(upstream)
|
||||
if err == nil && u.Scheme != "" {
|
||||
scheme := strings.ToLower(u.Scheme)
|
||||
// Normalize scheme names
|
||||
switch scheme {
|
||||
case "udp", "doudp":
|
||||
return "doudp"
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on error
|
||||
set -e
|
||||
|
||||
# Default values
|
||||
TOOL_PATH="./qol"
|
||||
DOMAINS_FILE="./domains.txt"
|
||||
OUTPUT_DIR="./results"
|
||||
INTERFACE="veth1"
|
||||
TIMEOUT="5s"
|
||||
SLEEP_TIME="1"
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-t|--tool-path)
|
||||
TOOL_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
-d|--domains-file)
|
||||
DOMAINS_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
-o|--output-dir)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
-I|--interface)
|
||||
INTERFACE="$2"
|
||||
shift 2
|
||||
;;
|
||||
-T|--timeout)
|
||||
TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
-s|--sleep)
|
||||
SLEEP_TIME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--help)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -t, --tool-path PATH Path to qol tool (default: ./qol)"
|
||||
echo " -d, --domains-file PATH Path to domains file (default: ./domains.txt)"
|
||||
echo " -o, --output-dir PATH Output directory (default: ./results)"
|
||||
echo " -I, --interface NAME Network interface (default: veth1)"
|
||||
echo " -T, --timeout DURATION Timeout duration (default: 5s)"
|
||||
echo " -s, --sleep SECONDS Sleep between runs (default: 1)"
|
||||
echo " --help Show this help"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
echo "Use --help for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "Configuration:"
|
||||
echo " Tool path: $TOOL_PATH"
|
||||
echo " Domains file: $DOMAINS_FILE"
|
||||
echo " Output dir: $OUTPUT_DIR"
|
||||
echo " Interface: $INTERFACE"
|
||||
echo " Timeout: $TIMEOUT"
|
||||
echo " Sleep time: ${SLEEP_TIME}s"
|
||||
echo ""
|
||||
|
||||
# Connection-based protocols that benefit from keep-alive (TCP-based)
|
||||
CONN_SERVERS=(
|
||||
-s "dotcp://8.8.8.8:53"
|
||||
-s "dotcp://1.1.1.1:53"
|
||||
-s "dotcp://9.9.9.9:53"
|
||||
-s "dotcp://dns.adguard-dns.com:53"
|
||||
#-s "tls://8.8.8.8:853"
|
||||
#-s "tls://1.1.1.1:853"
|
||||
#-s "tls://9.9.9.9:853"
|
||||
#-s "tls://dns.adguard-dns.com:853"
|
||||
#-s "https://dns.google/dns-query"
|
||||
#-s "https://cloudflare-dns.com/dns-query"
|
||||
#-s "https://dns10.quad9.net/dns-query"
|
||||
#-s "https://dns.adguard-dns.com/dns-query"
|
||||
)
|
||||
|
||||
# QUIC-based protocols (have built-in 0-RTT, keep-alive doesn't add value)
|
||||
QUIC_SERVERS=(
|
||||
#-s "doh3://dns.google/dns-query"
|
||||
#-s "doh3://cloudflare-dns.com/dns-query"
|
||||
#-s "doh3://dns.adguard-dns.com/dns-query"
|
||||
#-s "doq://dns.adguard-dns.com:853"
|
||||
)
|
||||
|
||||
# Connectionless protocols (no keep-alive)
|
||||
CONNLESS_SERVERS=(
|
||||
# -s "udp://8.8.8.8:53"
|
||||
# -s "udp://1.1.1.1:53"
|
||||
# -s "udp://9.9.9.9:53"
|
||||
# -s "udp://dns.adguard-dns.com:53"
|
||||
-s "sdns://AQMAAAAAAAAAETk0LjE0MC4xNC4xNDo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20"
|
||||
-s "sdns://AQMAAAAAAAAAFDE0OS4xMTIuMTEyLjExMjo4NDQzIGfIR7jIdYzRICRVQ751Z0bfNN8dhMALjEcDaN-CHYY-GTIuZG5zY3J5cHQtY2VydC5xdWFkOS5uZXQ"
|
||||
)
|
||||
|
||||
# Common args
|
||||
COMMON_ARGS=(
|
||||
"$DOMAINS_FILE"
|
||||
--interface "$INTERFACE"
|
||||
--timeout "$TIMEOUT"
|
||||
)
|
||||
|
||||
# Combinations for TCP-based connection protocols
|
||||
CONN_COMBINATIONS=(
|
||||
# DNSSEC off, Keep off
|
||||
""
|
||||
|
||||
# DNSSEC off, Keep on
|
||||
"--keep-alive"
|
||||
|
||||
# DNSSEC on (trust), Keep on
|
||||
"--dnssec --keep-alive"
|
||||
|
||||
# DNSSEC on (auth), Keep on
|
||||
"--dnssec --authoritative-dnssec --keep-alive"
|
||||
)
|
||||
|
||||
# Combinations for QUIC and connectionless protocols (no keep-alive)
|
||||
NO_KEEPALIVE_COMBINATIONS=(
|
||||
# DNSSEC off
|
||||
""
|
||||
|
||||
# DNSSEC on (trust)
|
||||
"--dnssec"
|
||||
|
||||
# DNSSEC on (auth)
|
||||
"--dnssec --authoritative-dnssec"
|
||||
)
|
||||
|
||||
echo "=== Running TCP-based protocols (TLS/HTTPS) ==="
|
||||
for FLAGS in "${CONN_COMBINATIONS[@]}"; do
|
||||
echo "Running: $FLAGS"
|
||||
|
||||
FLAGS_ARRAY=($FLAGS)
|
||||
|
||||
sudo "$TOOL_PATH" run \
|
||||
--output-dir "$OUTPUT_DIR" \
|
||||
"${COMMON_ARGS[@]}" \
|
||||
"${CONN_SERVERS[@]}" \
|
||||
"${FLAGS_ARRAY[@]}" || true
|
||||
|
||||
sleep "$SLEEP_TIME"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Running QUIC-based protocols (DoH3/DoQ) ==="
|
||||
for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
|
||||
echo "Running: $FLAGS"
|
||||
|
||||
FLAGS_ARRAY=($FLAGS)
|
||||
|
||||
sudo "$TOOL_PATH" run \
|
||||
--output-dir "$OUTPUT_DIR" \
|
||||
"${COMMON_ARGS[@]}" \
|
||||
"${QUIC_SERVERS[@]}" \
|
||||
"${FLAGS_ARRAY[@]}" || true
|
||||
|
||||
sleep "$SLEEP_TIME"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Running connectionless protocols (UDP) ==="
|
||||
for FLAGS in "${NO_KEEPALIVE_COMBINATIONS[@]}"; do
|
||||
echo "Running: $FLAGS"
|
||||
|
||||
FLAGS_ARRAY=($FLAGS)
|
||||
|
||||
sudo "$TOOL_PATH" run \
|
||||
--output-dir "$OUTPUT_DIR" \
|
||||
"${COMMON_ARGS[@]}" \
|
||||
"${CONNLESS_SERVERS[@]}" \
|
||||
"${FLAGS_ARRAY[@]}" || true
|
||||
|
||||
sleep "$SLEEP_TIME"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "All combinations completed!"
|
||||
@@ -1,498 +0,0 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from scipy import stats
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
# Set style for publication-quality plots
|
||||
sns.set_style("whitegrid")
|
||||
plt.rcParams['figure.dpi'] = 300
|
||||
plt.rcParams['savefig.dpi'] = 300
|
||||
plt.rcParams['font.size'] = 10
|
||||
plt.rcParams['figure.figsize'] = (12, 6)
|
||||
|
||||
class DNSAnalyzer:
|
||||
def __init__(self, results_dir='results'):
|
||||
self.results_dir = Path(results_dir)
|
||||
self.df = None
|
||||
|
||||
def load_all_data(self):
|
||||
"""Load all CSV files from the results directory"""
|
||||
data_frames = []
|
||||
|
||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
||||
|
||||
for provider in providers:
|
||||
provider_path = self.results_dir / provider
|
||||
if not provider_path.exists():
|
||||
continue
|
||||
|
||||
for csv_file in provider_path.glob('*.csv'):
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
df['provider'] = provider
|
||||
df['test_config'] = csv_file.stem
|
||||
data_frames.append(df)
|
||||
except Exception as e:
|
||||
print(f"Error loading {csv_file}: {e}")
|
||||
|
||||
self.df = pd.concat(data_frames, ignore_index=True)
|
||||
self._clean_and_enrich_data()
|
||||
print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
|
||||
|
||||
def _clean_and_enrich_data(self):
|
||||
"""Clean data and add useful columns"""
|
||||
# Remove failed queries
|
||||
self.df = self.df[self.df['error'].isna()]
|
||||
|
||||
# Extract protocol base (remove -auth, -trust suffixes)
|
||||
self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
|
||||
|
||||
# DNSSEC configuration
|
||||
self.df['dnssec_mode'] = 'none'
|
||||
self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
|
||||
self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
|
||||
|
||||
# Protocol categories
|
||||
self.df['protocol_category'] = self.df['protocol_base'].map({
|
||||
'udp': 'Plain DNS',
|
||||
'tls': 'DoT',
|
||||
'https': 'DoH',
|
||||
'doh3': 'DoH/3',
|
||||
'doq': 'DoQ'
|
||||
})
|
||||
|
||||
# Connection persistence
|
||||
self.df['persistence'] = self.df['keep_alive'].fillna(False)
|
||||
|
||||
def generate_summary_statistics(self):
|
||||
"""Generate comprehensive summary statistics"""
|
||||
print("\n" + "="*80)
|
||||
print("SUMMARY STATISTICS")
|
||||
print("="*80)
|
||||
|
||||
# Overall statistics
|
||||
print("\n--- Overall Performance ---")
|
||||
print(f"Total queries: {len(self.df)}")
|
||||
print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
|
||||
print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
|
||||
print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
|
||||
print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
|
||||
|
||||
# By protocol
|
||||
print("\n--- Performance by Protocol ---")
|
||||
protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std'),
|
||||
('p95', lambda x: x.quantile(0.95)),
|
||||
('p99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
print(protocol_stats)
|
||||
|
||||
# By provider
|
||||
print("\n--- Performance by Provider ---")
|
||||
provider_stats = self.df.groupby('provider')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std'),
|
||||
('p95', lambda x: x.quantile(0.95))
|
||||
]).round(2)
|
||||
print(provider_stats)
|
||||
|
||||
# DNSSEC impact
|
||||
print("\n--- DNSSEC Validation Impact ---")
|
||||
dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
|
||||
('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('overhead_vs_none', lambda x: x.mean())
|
||||
]).round(2)
|
||||
|
||||
# Calculate overhead percentage
|
||||
baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
|
||||
if baseline > 0:
|
||||
dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
|
||||
print(dnssec_stats)
|
||||
|
||||
# Bandwidth analysis
|
||||
print("\n--- Bandwidth Usage ---")
|
||||
bandwidth_stats = self.df.groupby('protocol_category').agg({
|
||||
'request_size_bytes': ['mean', 'median'],
|
||||
'response_size_bytes': ['mean', 'median']
|
||||
}).round(2)
|
||||
print(bandwidth_stats)
|
||||
|
||||
# Persistence impact (where applicable)
|
||||
print("\n--- Connection Persistence Impact ---")
|
||||
persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
|
||||
if len(persist_protocols) > 0:
|
||||
persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
|
||||
('mean', 'mean'),
|
||||
('median', 'median')
|
||||
]).round(2)
|
||||
print(persist_stats)
|
||||
|
||||
return {
|
||||
'protocol': protocol_stats,
|
||||
'provider': provider_stats,
|
||||
'dnssec': dnssec_stats,
|
||||
'bandwidth': bandwidth_stats
|
||||
}
|
||||
|
||||
def plot_latency_by_protocol(self, output_dir='plots'):
|
||||
"""Violin plot of latency distribution by protocol"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
|
||||
# Order protocols logically
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
sns.violinplot(data=self.df, x='protocol_category', y='duration_ms',
|
||||
order=available_protocols, inner='box', cut=0)
|
||||
|
||||
plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
||||
plt.xticks(rotation=0)
|
||||
|
||||
# Add mean values as annotations
|
||||
for i, protocol in enumerate(available_protocols):
|
||||
mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
|
||||
plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: latency_by_protocol.png")
|
||||
|
||||
def plot_provider_comparison(self, output_dir='plots'):
|
||||
"""Box plot comparing providers across protocols"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
||||
fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
|
||||
|
||||
protocols = self.df['protocol_category'].unique()
|
||||
protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
|
||||
|
||||
for idx, protocol in enumerate(protocols[:4]):
|
||||
ax = axes[idx // 2, idx % 2]
|
||||
data = self.df[self.df['protocol_category'] == protocol]
|
||||
|
||||
if len(data) > 0:
|
||||
sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
|
||||
ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
|
||||
ax.set_xlabel('Provider', fontsize=10)
|
||||
ax.set_ylabel('Response Time (ms)', fontsize=10)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: provider_comparison.png")
|
||||
|
||||
def plot_dnssec_impact(self, output_dir='plots'):
|
||||
"""Compare DNSSEC validation methods (trust vs auth)"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Filter for protocols that have DNSSEC variations
|
||||
dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
|
||||
|
||||
if len(dnssec_data) == 0:
|
||||
print("⚠ No DNSSEC data available")
|
||||
return
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
|
||||
# Plot 1: Overall DNSSEC impact
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
sns.barplot(data=self.df, x='protocol_category', y='duration_ms',
|
||||
hue='dnssec_mode', order=available, ax=ax1, ci=95)
|
||||
ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
|
||||
ax1.set_xlabel('Protocol', fontsize=10)
|
||||
ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
|
||||
ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
|
||||
ax1.tick_params(axis='x', rotation=0)
|
||||
|
||||
# Plot 2: Trust vs Auth comparison
|
||||
comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
|
||||
pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
|
||||
|
||||
if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
|
||||
pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
|
||||
pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
|
||||
ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
|
||||
ax2.set_xlabel('Protocol', fontsize=10)
|
||||
ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
|
||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
||||
ax2.tick_params(axis='x', rotation=45)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: dnssec_impact.png")
|
||||
|
||||
def plot_persistence_impact(self, output_dir='plots'):
|
||||
"""Analyze impact of connection persistence"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
|
||||
|
||||
if len(persist_data) == 0:
|
||||
print("⚠ No persistence data available")
|
||||
return
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
|
||||
sns.barplot(data=persist_data, x='protocol_base', y='duration_ms',
|
||||
hue='persistence', ci=95)
|
||||
|
||||
plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Mean Response Time (ms)', fontsize=12)
|
||||
plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
|
||||
|
||||
# Calculate and annotate overhead reduction
|
||||
for protocol in persist_data['protocol_base'].unique():
|
||||
protocol_data = persist_data[persist_data['protocol_base'] == protocol]
|
||||
|
||||
no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
|
||||
with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
|
||||
|
||||
if not np.isnan(no_persist) and not np.isnan(with_persist):
|
||||
reduction = ((no_persist - with_persist) / no_persist * 100)
|
||||
print(f"{protocol}: {reduction:.1f}% reduction with persistence")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: persistence_impact.png")
|
||||
|
||||
def plot_bandwidth_overhead(self, output_dir='plots'):
|
||||
"""Visualize bandwidth usage by protocol"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
bandwidth_data = self.df.groupby('protocol_category').agg({
|
||||
'request_size_bytes': 'mean',
|
||||
'response_size_bytes': 'mean'
|
||||
}).reset_index()
|
||||
|
||||
bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] +
|
||||
bandwidth_data['response_size_bytes'])
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
|
||||
# Plot 1: Request vs Response sizes
|
||||
x = np.arange(len(bandwidth_data))
|
||||
width = 0.35
|
||||
|
||||
ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width,
|
||||
label='Request', alpha=0.8)
|
||||
ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width,
|
||||
label='Response', alpha=0.8)
|
||||
|
||||
ax1.set_xlabel('Protocol', fontsize=12)
|
||||
ax1.set_ylabel('Bytes', fontsize=12)
|
||||
ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(bandwidth_data['protocol_category'])
|
||||
ax1.legend()
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Plot 2: Total bandwidth overhead vs UDP baseline
|
||||
udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
|
||||
if len(udp_total) > 0:
|
||||
bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
|
||||
|
||||
colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
|
||||
ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'],
|
||||
color=colors, alpha=0.7)
|
||||
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
|
||||
ax2.set_xlabel('Protocol', fontsize=12)
|
||||
ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
|
||||
ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: bandwidth_overhead.png")
|
||||
|
||||
def plot_heatmap(self, output_dir='plots'):
|
||||
"""Heatmap of provider-protocol performance"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Create pivot table
|
||||
heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r',
|
||||
cbar_kws={'label': 'Median Latency (ms)'})
|
||||
|
||||
plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Provider', fontsize=12)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: provider_protocol_heatmap.png")
|
||||
|
||||
def plot_percentile_comparison(self, output_dir='plots'):
|
||||
"""Plot percentile comparison across protocols"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
percentiles = [50, 75, 90, 95, 99]
|
||||
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
|
||||
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
|
||||
|
||||
percentile_data = []
|
||||
for protocol in available:
|
||||
data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
|
||||
for p in percentiles:
|
||||
percentile_data.append({
|
||||
'protocol': protocol,
|
||||
'percentile': f'P{p}',
|
||||
'latency': np.percentile(data, p)
|
||||
})
|
||||
|
||||
percentile_df = pd.DataFrame(percentile_data)
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
|
||||
|
||||
plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
|
||||
plt.xlabel('Protocol', fontsize=12)
|
||||
plt.ylabel('Response Time (ms)', fontsize=12)
|
||||
plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f"✓ Saved: percentile_comparison.png")
|
||||
|
||||
def statistical_tests(self):
|
||||
"""Perform statistical significance tests"""
|
||||
print("\n" + "="*80)
|
||||
print("STATISTICAL TESTS")
|
||||
print("="*80)
|
||||
|
||||
# Test 1: Protocol differences (Kruskal-Wallis)
|
||||
protocols = self.df['protocol_category'].unique()
|
||||
if len(protocols) > 2:
|
||||
groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values
|
||||
for p in protocols]
|
||||
h_stat, p_value = stats.kruskal(*groups)
|
||||
print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
|
||||
print(f"H-statistic: {h_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
|
||||
|
||||
# Test 2: DNSSEC impact (Mann-Whitney U)
|
||||
if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
||||
none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
|
||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
||||
|
||||
u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
|
||||
print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
|
||||
print(f"U-statistic: {u_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
|
||||
|
||||
# Test 3: Trust vs Auth comparison
|
||||
if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
|
||||
trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
|
||||
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
|
||||
|
||||
u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
|
||||
print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
|
||||
print(f"U-statistic: {u_stat:.4f}")
|
||||
print(f"p-value: {p_value:.4e}")
|
||||
print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
|
||||
|
||||
def generate_latex_table(self, output_dir='plots'):
|
||||
"""Generate LaTeX table for thesis"""
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
# Summary table by protocol
|
||||
summary = self.df.groupby('protocol_category')['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('Std Dev', 'std'),
|
||||
('P95', lambda x: x.quantile(0.95)),
|
||||
('P99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
|
||||
latex_code = summary.to_latex(float_format="%.2f")
|
||||
|
||||
with open(f'{output_dir}/summary_table.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f"✓ Saved: summary_table.tex")
|
||||
print("\nLaTeX Table Preview:")
|
||||
print(latex_code)
|
||||
|
||||
def run_full_analysis(self):
|
||||
"""Run complete analysis pipeline"""
|
||||
print("="*80)
|
||||
print("DNS QoS Analysis - Starting Full Analysis")
|
||||
print("="*80)
|
||||
|
||||
# Load data
|
||||
print("\n[1/10] Loading data...")
|
||||
self.load_all_data()
|
||||
|
||||
# Generate statistics
|
||||
print("\n[2/10] Generating summary statistics...")
|
||||
self.generate_summary_statistics()
|
||||
|
||||
# Statistical tests
|
||||
print("\n[3/10] Running statistical tests...")
|
||||
self.statistical_tests()
|
||||
|
||||
# Generate plots
|
||||
print("\n[4/10] Creating latency by protocol plot...")
|
||||
self.plot_latency_by_protocol()
|
||||
|
||||
print("\n[5/10] Creating provider comparison plot...")
|
||||
self.plot_provider_comparison()
|
||||
|
||||
print("\n[6/10] Creating DNSSEC impact plot...")
|
||||
self.plot_dnssec_impact()
|
||||
|
||||
print("\n[7/10] Creating persistence impact plot...")
|
||||
self.plot_persistence_impact()
|
||||
|
||||
print("\n[8/10] Creating bandwidth overhead plot...")
|
||||
self.plot_bandwidth_overhead()
|
||||
|
||||
print("\n[9/10] Creating heatmap...")
|
||||
self.plot_heatmap()
|
||||
|
||||
print("\n[10/10] Creating percentile comparison...")
|
||||
self.plot_percentile_comparison()
|
||||
|
||||
# Generate LaTeX table
|
||||
print("\n[Bonus] Generating LaTeX table...")
|
||||
self.generate_latex_table()
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
|
||||
print("="*80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyzer = DNSAnalyzer(results_dir='results')
|
||||
analyzer.run_full_analysis()
|
||||
@@ -1,536 +0,0 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
from dateutil import parser as date_parser
|
||||
import dpkt
|
||||
|
||||
# Set style
|
||||
sns.set_style("whitegrid")
|
||||
plt.rcParams['figure.dpi'] = 300
|
||||
plt.rcParams['savefig.dpi'] = 300
|
||||
plt.rcParams['font.size'] = 10
|
||||
|
||||
class FastDNSAnalyzer:
|
||||
def __init__(self, results_dir='results'):
|
||||
self.results_dir = Path(results_dir)
|
||||
self.all_data = []
|
||||
|
||||
def should_include_file(self, filename):
|
||||
"""Filter out DNSSEC and non-persist files"""
|
||||
name = filename.stem
|
||||
if 'auth' in name or 'trust' in name:
|
||||
return False
|
||||
if name in ['tls', 'https']:
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse_rfc3339_nano(self, timestamp_str):
|
||||
"""Parse RFC3339Nano timestamp with timezone"""
|
||||
try:
|
||||
dt = date_parser.parse(timestamp_str)
|
||||
return dt.astimezone(datetime.timezone.utc).timestamp()
|
||||
except Exception as e:
|
||||
print(f" Error parsing timestamp {timestamp_str}: {e}")
|
||||
return None
|
||||
|
||||
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
|
||||
"""Fast bandwidth extraction using dpkt"""
|
||||
print(f" Analyzing pcap: {pcap_file.name}")
|
||||
|
||||
try:
|
||||
with open(pcap_file, 'rb') as f:
|
||||
pcap = dpkt.pcap.Reader(f)
|
||||
|
||||
# Build query time windows
|
||||
query_windows = []
|
||||
for idx, row in csv_data.iterrows():
|
||||
start_time = self.parse_rfc3339_nano(row['timestamp'])
|
||||
if start_time is None:
|
||||
continue
|
||||
|
||||
duration_seconds = row['duration_ns'] / 1_000_000_000
|
||||
end_time = start_time + duration_seconds
|
||||
|
||||
query_windows.append({
|
||||
'index': idx,
|
||||
'start': start_time,
|
||||
'end': end_time,
|
||||
'bytes_sent': 0,
|
||||
'bytes_received': 0,
|
||||
'packets_sent': 0,
|
||||
'packets_received': 0
|
||||
})
|
||||
|
||||
if not query_windows:
|
||||
print(" ✗ No valid query windows")
|
||||
return None
|
||||
|
||||
# Sort windows for faster matching
|
||||
query_windows.sort(key=lambda x: x['start'])
|
||||
|
||||
# Process packets
|
||||
packet_count = 0
|
||||
matched_count = 0
|
||||
|
||||
for timestamp, buf in pcap:
|
||||
packet_count += 1
|
||||
packet_size = len(buf)
|
||||
|
||||
# Quick parse to determine direction
|
||||
try:
|
||||
eth = dpkt.ethernet.Ethernet(buf)
|
||||
|
||||
# Get IP layer
|
||||
if isinstance(eth.data, dpkt.ip.IP):
|
||||
ip = eth.data
|
||||
elif isinstance(eth.data, dpkt.ip6.IP6):
|
||||
ip = eth.data
|
||||
else:
|
||||
continue
|
||||
|
||||
# Get transport layer
|
||||
if isinstance(ip.data, dpkt.udp.UDP):
|
||||
transport = ip.data
|
||||
src_port = transport.sport
|
||||
dst_port = transport.dport
|
||||
elif isinstance(ip.data, dpkt.tcp.TCP):
|
||||
transport = ip.data
|
||||
src_port = transport.sport
|
||||
dst_port = transport.dport
|
||||
else:
|
||||
continue
|
||||
|
||||
# Determine direction (client port usually higher)
|
||||
is_outbound = src_port > dst_port
|
||||
|
||||
# Binary search for matching window
|
||||
for window in query_windows:
|
||||
if window['start'] <= timestamp <= window['end']:
|
||||
if is_outbound:
|
||||
window['bytes_sent'] += packet_size
|
||||
window['packets_sent'] += 1
|
||||
else:
|
||||
window['bytes_received'] += packet_size
|
||||
window['packets_received'] += 1
|
||||
matched_count += 1
|
||||
break
|
||||
elif timestamp < window['start']:
|
||||
break # No more windows to check
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
|
||||
|
||||
# Convert to DataFrame
|
||||
bandwidth_df = pd.DataFrame(query_windows)
|
||||
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
|
||||
'packets_sent', 'packets_received']]
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error reading pcap: {e}")
|
||||
return None
|
||||
|
||||
def load_data(self):
|
||||
"""Load all relevant CSV files and extract bandwidth from pcaps"""
|
||||
print("Loading data and analyzing bandwidth...")
|
||||
|
||||
for provider_dir in self.results_dir.iterdir():
|
||||
if not provider_dir.is_dir():
|
||||
continue
|
||||
|
||||
provider = provider_dir.name
|
||||
|
||||
for csv_file in provider_dir.glob('*.csv'):
|
||||
if not self.should_include_file(csv_file):
|
||||
continue
|
||||
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
df['provider'] = provider
|
||||
df['test_file'] = csv_file.stem
|
||||
df['csv_path'] = str(csv_file)
|
||||
|
||||
# Find corresponding pcap file
|
||||
pcap_file = csv_file.with_suffix('.pcap')
|
||||
if pcap_file.exists():
|
||||
print(f" Processing: {provider}/{csv_file.name}")
|
||||
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
|
||||
|
||||
if bandwidth_data is not None and len(bandwidth_data) > 0:
|
||||
# Merge bandwidth data
|
||||
df = df.reset_index(drop=True)
|
||||
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
|
||||
df[col] = 0
|
||||
|
||||
for _, row in bandwidth_data.iterrows():
|
||||
idx = int(row['index'])
|
||||
if idx < len(df):
|
||||
df.at[idx, 'bytes_sent'] = row['bytes_sent']
|
||||
df.at[idx, 'bytes_received'] = row['bytes_received']
|
||||
df.at[idx, 'packets_sent'] = row['packets_sent']
|
||||
df.at[idx, 'packets_received'] = row['packets_received']
|
||||
|
||||
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
|
||||
|
||||
print(f" ✓ Extracted bandwidth for {len(df)} queries")
|
||||
else:
|
||||
print(f" ⚠ Could not extract bandwidth data")
|
||||
else:
|
||||
print(f" ⚠ No pcap found for {csv_file.name}")
|
||||
|
||||
self.all_data.append(df)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error loading {csv_file}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print(f"\nTotal files loaded: {len(self.all_data)}")
|
||||
|
||||
def create_line_graphs(self, output_dir='output/line_graphs'):
|
||||
"""Create line graphs for latency and bandwidth"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating line graphs...")
|
||||
|
||||
for df in self.all_data:
|
||||
provider = df['provider'].iloc[0]
|
||||
test_name = df['test_file'].iloc[0]
|
||||
|
||||
df['query_index'] = range(1, len(df) + 1)
|
||||
|
||||
# Create figure with 2 subplots
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
|
||||
|
||||
# Plot 1: Latency
|
||||
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
|
||||
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
|
||||
mean_latency = df['duration_ms'].mean()
|
||||
ax1.axhline(y=mean_latency, color='r', linestyle='--',
|
||||
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
|
||||
ax1.set_xlabel('Query Number', fontsize=12)
|
||||
ax1.set_ylabel('Latency (ms)', fontsize=12)
|
||||
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 2: Bandwidth
|
||||
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
|
||||
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
|
||||
markersize=4, linewidth=1, alpha=0.7,
|
||||
color='orange', label='Sent')
|
||||
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
|
||||
markersize=4, linewidth=1, alpha=0.7,
|
||||
color='green', label='Received')
|
||||
|
||||
mean_sent = df['bytes_sent'].mean()
|
||||
mean_received = df['bytes_received'].mean()
|
||||
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
|
||||
linewidth=1.5, alpha=0.5)
|
||||
ax2.axhline(y=mean_received, color='green', linestyle='--',
|
||||
linewidth=1.5, alpha=0.5)
|
||||
|
||||
ax2.set_xlabel('Query Number', fontsize=12)
|
||||
ax2.set_ylabel('Bytes', fontsize=12)
|
||||
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
|
||||
fontsize=12, fontweight='bold')
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
fig.suptitle(f'{provider.upper()} - {test_name}',
|
||||
fontsize=14, fontweight='bold')
|
||||
plt.tight_layout()
|
||||
|
||||
filename = f"{provider}_{test_name}.png"
|
||||
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
print(f" ✓ Created: {filename}")
|
||||
|
||||
def get_protocol_name(self, test_file):
|
||||
"""Extract clean protocol name"""
|
||||
name = test_file.replace('-persist', '')
|
||||
|
||||
protocol_map = {
|
||||
'udp': 'Plain DNS (UDP)',
|
||||
'tls': 'DoT (DNS over TLS)',
|
||||
'https': 'DoH (DNS over HTTPS)',
|
||||
'doh3': 'DoH/3 (DNS over HTTP/3)',
|
||||
'doq': 'DoQ (DNS over QUIC)'
|
||||
}
|
||||
|
||||
return protocol_map.get(name, name.upper())
|
||||
|
||||
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
|
||||
"""Create bar graphs comparing resolvers for latency and bandwidth"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating resolver comparison graphs...")
|
||||
|
||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
||||
protocols = combined_df['test_file'].unique()
|
||||
|
||||
for protocol in protocols:
|
||||
protocol_data = combined_df[combined_df['test_file'] == protocol]
|
||||
protocol_name = self.get_protocol_name(protocol)
|
||||
|
||||
# Latency stats
|
||||
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
|
||||
('mean', 'mean'),
|
||||
('median', 'median'),
|
||||
('std', 'std')
|
||||
]).reset_index()
|
||||
|
||||
# Create latency comparison
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
fig.suptitle(f'{protocol_name} - Latency Comparison',
|
||||
fontsize=16, fontweight='bold')
|
||||
|
||||
# Mean latency
|
||||
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
|
||||
color='steelblue', alpha=0.8, edgecolor='black')
|
||||
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
|
||||
yerr=latency_stats['std'], fmt='none', color='black',
|
||||
capsize=5, alpha=0.6)
|
||||
|
||||
for bar in bars1:
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.2f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax1.set_xlabel('Resolver', fontsize=12)
|
||||
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
|
||||
ax1.set_title('Mean Latency', fontsize=12)
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Median latency
|
||||
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
|
||||
color='coral', alpha=0.8, edgecolor='black')
|
||||
|
||||
for bar in bars2:
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.2f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax2.set_xlabel('Resolver', fontsize=12)
|
||||
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
|
||||
ax2.set_title('Median Latency', fontsize=12)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" ✓ Created: latency_{protocol}.png")
|
||||
|
||||
# Bandwidth comparison
|
||||
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
|
||||
bandwidth_stats = protocol_data.groupby('provider').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).reset_index()
|
||||
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
|
||||
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
|
||||
fontsize=16, fontweight='bold')
|
||||
|
||||
# Sent vs Received
|
||||
x = np.arange(len(bandwidth_stats))
|
||||
width = 0.35
|
||||
|
||||
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
|
||||
label='Sent', color='orange', alpha=0.8, edgecolor='black')
|
||||
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
|
||||
label='Received', color='green', alpha=0.8, edgecolor='black')
|
||||
|
||||
ax1.set_xlabel('Resolver', fontsize=12)
|
||||
ax1.set_ylabel('Bytes per Query', fontsize=12)
|
||||
ax1.set_title('Average Bandwidth per Query', fontsize=12)
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(bandwidth_stats['provider'])
|
||||
ax1.legend()
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Total bandwidth
|
||||
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
|
||||
color='purple', alpha=0.8, edgecolor='black')
|
||||
|
||||
for bar in bars3:
|
||||
height = bar.get_height()
|
||||
ax2.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{height:.0f}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
ax2.set_xlabel('Resolver', fontsize=12)
|
||||
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
|
||||
ax2.set_title('Total Bandwidth per Query', fontsize=12)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" ✓ Created: bandwidth_{protocol}.png")
|
||||
|
||||
def generate_latex_tables(self, output_dir='output/tables'):
|
||||
"""Generate LaTeX tables with latency and bandwidth statistics"""
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("\nGenerating LaTeX tables...")
|
||||
|
||||
combined_df = pd.concat(self.all_data, ignore_index=True)
|
||||
|
||||
# Generate latency table for each resolver
|
||||
for provider in combined_df['provider'].unique():
|
||||
provider_data = combined_df[combined_df['provider'] == provider]
|
||||
|
||||
stats = provider_data.groupby('test_file')['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('Std Dev', 'std'),
|
||||
('P95', lambda x: x.quantile(0.95)),
|
||||
('P99', lambda x: x.quantile(0.99))
|
||||
]).round(2)
|
||||
|
||||
stats.index = stats.index.map(self.get_protocol_name)
|
||||
stats.index.name = 'Protocol'
|
||||
|
||||
latex_code = stats.to_latex(
|
||||
caption=f'{provider.upper()} - Latency Statistics (ms)',
|
||||
label=f'tab:{provider}_latency',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: {provider}_latency.tex")
|
||||
|
||||
# Generate bandwidth table for each resolver
|
||||
for provider in combined_df['provider'].unique():
|
||||
provider_data = combined_df[combined_df['provider'] == provider]
|
||||
|
||||
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
|
||||
continue
|
||||
|
||||
bandwidth_stats = provider_data.groupby('test_file').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).round(2)
|
||||
|
||||
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
|
||||
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
|
||||
bandwidth_stats.index.name = 'Protocol'
|
||||
|
||||
latex_code = bandwidth_stats.to_latex(
|
||||
caption=f'{provider.upper()} - Bandwidth Statistics',
|
||||
label=f'tab:{provider}_bandwidth',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: {provider}_bandwidth.tex")
|
||||
|
||||
# Generate protocol efficiency table
|
||||
print("\nGenerating protocol efficiency table...")
|
||||
|
||||
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
|
||||
protocol_bandwidth = combined_df.groupby('test_file').agg({
|
||||
'bytes_sent': 'mean',
|
||||
'bytes_received': 'mean',
|
||||
'total_bytes': 'mean'
|
||||
}).round(2)
|
||||
|
||||
# Find UDP baseline
|
||||
udp_baseline = None
|
||||
for protocol in protocol_bandwidth.index:
|
||||
if 'udp' in protocol:
|
||||
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
|
||||
break
|
||||
|
||||
if udp_baseline and udp_baseline > 0:
|
||||
protocol_bandwidth['Overhead vs UDP (%)'] = (
|
||||
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
|
||||
).round(1)
|
||||
protocol_bandwidth['Efficiency (%)'] = (
|
||||
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
|
||||
).round(1)
|
||||
|
||||
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
|
||||
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
|
||||
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
|
||||
protocol_bandwidth.index.name = 'Protocol'
|
||||
|
||||
latex_code = protocol_bandwidth.to_latex(
|
||||
caption='Protocol Bandwidth Efficiency Comparison',
|
||||
label='tab:protocol_efficiency',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: protocol_efficiency.tex")
|
||||
print("\n--- Protocol Efficiency ---")
|
||||
print(protocol_bandwidth.to_string())
|
||||
|
||||
# Generate combined comparison tables
|
||||
for metric in ['Mean', 'Median', 'P95']:
|
||||
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
|
||||
('Mean', 'mean'),
|
||||
('Median', 'median'),
|
||||
('P95', lambda x: x.quantile(0.95))
|
||||
]).round(2)
|
||||
|
||||
pivot_table = comparison_stats[metric].unstack(level=0)
|
||||
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
|
||||
pivot_table.index.name = 'Protocol'
|
||||
|
||||
latex_code = pivot_table.to_latex(
|
||||
caption=f'Resolver Latency Comparison - {metric} (ms)',
|
||||
label=f'tab:comparison_{metric.lower()}',
|
||||
float_format="%.2f"
|
||||
)
|
||||
|
||||
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
|
||||
f.write(latex_code)
|
||||
|
||||
print(f" ✓ Created: comparison_{metric.lower()}.tex")
|
||||
|
||||
def run_analysis(self):
|
||||
"""Run the complete analysis"""
|
||||
print("="*80)
|
||||
print("Fast DNS QoS Analysis with Bandwidth")
|
||||
print("="*80)
|
||||
|
||||
self.load_data()
|
||||
|
||||
if not self.all_data:
|
||||
print("\n⚠ No data loaded.")
|
||||
return
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.create_line_graphs()
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.create_resolver_comparison_bars()
|
||||
|
||||
print("\n" + "="*80)
|
||||
self.generate_latex_tables()
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("✓ Analysis Complete!")
|
||||
print("="*80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyzer = FastDNSAnalyzer(results_dir='results')
|
||||
analyzer.run_analysis()
|
||||
+55
-12
@@ -15,6 +15,15 @@ import dpkt
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
|
||||
BANDWIDTH_COLUMNS = [
|
||||
'bytes_sent',
|
||||
'bytes_received',
|
||||
'packets_sent',
|
||||
'packets_received',
|
||||
'total_bytes',
|
||||
]
|
||||
|
||||
|
||||
class Packet(NamedTuple):
|
||||
"""Lightweight packet representation."""
|
||||
timestamp: float
|
||||
@@ -36,6 +45,36 @@ class QueryWindow:
|
||||
self.pkts_received = 0
|
||||
|
||||
|
||||
def is_already_processed(csv_path: Path) -> bool:
|
||||
"""
|
||||
Check if CSV has already been processed.
|
||||
Returns True if bandwidth columns exist AND at least one row has non-zero data.
|
||||
"""
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
|
||||
# Check if columns exist
|
||||
if not reader.fieldnames:
|
||||
return False
|
||||
|
||||
if not all(col in reader.fieldnames for col in BANDWIDTH_COLUMNS):
|
||||
return False
|
||||
|
||||
# Check if any row has non-zero bandwidth data
|
||||
for row in reader:
|
||||
for col in BANDWIDTH_COLUMNS:
|
||||
val = row.get(col, '').strip()
|
||||
if val and val != '0':
|
||||
return True
|
||||
|
||||
# All rows have zero/empty values - not truly processed
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def parse_csv_timestamp(ts_str: str) -> float:
|
||||
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
|
||||
dt = date_parser.isoparse(ts_str)
|
||||
@@ -249,24 +288,20 @@ def write_enriched_csv(
|
||||
shutil.copy2(csv_path, backup_path)
|
||||
print(f" Backup: {backup_path.name}")
|
||||
|
||||
# Get fieldnames
|
||||
original_fields = list(queries[0]['data'].keys())
|
||||
new_fields = [
|
||||
'bytes_sent',
|
||||
'bytes_received',
|
||||
'packets_sent',
|
||||
'packets_received',
|
||||
'total_bytes',
|
||||
# Get fieldnames - filter out any existing bandwidth columns to avoid dupes
|
||||
original_fields = [
|
||||
f for f in queries[0]['data'].keys()
|
||||
if f not in BANDWIDTH_COLUMNS
|
||||
]
|
||||
fieldnames = original_fields + new_fields
|
||||
fieldnames = original_fields + BANDWIDTH_COLUMNS
|
||||
|
||||
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for q in queries:
|
||||
row = q['data'].copy()
|
||||
for field in new_fields:
|
||||
row = {k: v for k, v in q['data'].items() if k not in BANDWIDTH_COLUMNS}
|
||||
for field in BANDWIDTH_COLUMNS:
|
||||
row[field] = q[field]
|
||||
writer.writerow(row)
|
||||
|
||||
@@ -281,6 +316,7 @@ def process_provider_directory(provider_path: Path):
|
||||
|
||||
csv_files = sorted(provider_path.glob('*.csv'))
|
||||
processed = 0
|
||||
skipped = 0
|
||||
total_time = 0
|
||||
|
||||
for csv_path in csv_files:
|
||||
@@ -294,6 +330,12 @@ def process_provider_directory(provider_path: Path):
|
||||
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
|
||||
continue
|
||||
|
||||
# Check if already processed
|
||||
if is_already_processed(csv_path):
|
||||
print(f"\n ⏭ Skipping {csv_path.name} - already processed")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
print(f"\n 📁 {csv_path.name}")
|
||||
file_start = time.time()
|
||||
|
||||
@@ -323,7 +365,8 @@ def process_provider_directory(provider_path: Path):
|
||||
print(f" ✓ Completed in {file_time:.2f}s")
|
||||
|
||||
print(f"\n {'='*58}")
|
||||
print(f" {provider_path.name}: {processed} files in {total_time:.2f}s")
|
||||
print(f" {provider_path.name}: {processed} processed, {skipped} skipped")
|
||||
print(f" Time: {total_time:.2f}s")
|
||||
print(f" {'='*58}")
|
||||
|
||||
|
||||
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Merge all DNS test CSVs into a single unified CSV.
|
||||
Extracts metadata from filenames and directory structure.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dateutil import parser as date_parser
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_config(filename: str) -> dict:
|
||||
"""
|
||||
Parse protocol, dnssec_mode, and keep_alive from filename.
|
||||
|
||||
Examples:
|
||||
doh3-auth.csv → protocol=doh3, dnssec=auth, persist=0
|
||||
tls-trust-persist.csv → protocol=tls, dnssec=trust, persist=1
|
||||
https.csv → protocol=https, dnssec=off, persist=0
|
||||
doudp-auth.csv → protocol=doudp, dnssec=auth, persist=0
|
||||
dnscrypt-trust.csv → protocol=dnscrypt, dnssec=trust, persist=0
|
||||
"""
|
||||
base = filename.replace('.csv', '')
|
||||
parts = base.split('-')
|
||||
|
||||
protocol = parts[0]
|
||||
dnssec_mode = 'off'
|
||||
keep_alive = 0
|
||||
|
||||
for part in parts[1:]:
|
||||
if part in ('auth', 'trust'):
|
||||
dnssec_mode = part
|
||||
elif part == 'persist':
|
||||
keep_alive = 1
|
||||
|
||||
return {
|
||||
'protocol': protocol,
|
||||
'dnssec_mode': dnssec_mode,
|
||||
'keep_alive': keep_alive,
|
||||
}
|
||||
|
||||
|
||||
def parse_timestamp_unix(ts_str: str) -> float:
|
||||
"""Convert RFC3339 timestamp to Unix epoch."""
|
||||
try:
|
||||
dt = date_parser.isoparse(ts_str)
|
||||
return dt.timestamp()
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def ns_to_ms(duration_ns: str) -> float:
|
||||
"""Convert nanoseconds to milliseconds."""
|
||||
try:
|
||||
return float(duration_ns) / 1_000_000
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def find_csv_files(input_dir: Path) -> list:
|
||||
"""Find all non-backup CSV files."""
|
||||
files = []
|
||||
for csv_path in input_dir.rglob('*.csv'):
|
||||
if '.bak' in csv_path.name:
|
||||
continue
|
||||
files.append(csv_path)
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def merge_all_csvs(input_dir: Path, output_path: Path):
|
||||
"""Merge all CSVs into a single file."""
|
||||
|
||||
csv_files = find_csv_files(input_dir)
|
||||
|
||||
if not csv_files:
|
||||
print("No CSV files found")
|
||||
return
|
||||
|
||||
print(f"Found {len(csv_files)} CSV files")
|
||||
|
||||
# Output columns in desired order
|
||||
output_columns = [
|
||||
'id',
|
||||
'provider',
|
||||
'protocol',
|
||||
'dnssec_mode',
|
||||
'domain',
|
||||
'query_type',
|
||||
'keep_alive',
|
||||
'dns_server',
|
||||
'timestamp',
|
||||
'timestamp_unix',
|
||||
'duration_ns',
|
||||
'duration_ms',
|
||||
'request_size_bytes',
|
||||
'response_size_bytes',
|
||||
'bytes_sent',
|
||||
'bytes_received',
|
||||
'packets_sent',
|
||||
'packets_received',
|
||||
'total_bytes',
|
||||
'response_code',
|
||||
'error',
|
||||
]
|
||||
|
||||
global_id = 0
|
||||
total_rows = 0
|
||||
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=output_columns)
|
||||
writer.writeheader()
|
||||
|
||||
for csv_path in csv_files:
|
||||
# Extract provider from path
|
||||
provider = csv_path.parent.name.lower()
|
||||
|
||||
# Parse config from filename
|
||||
config = parse_config(csv_path.name)
|
||||
|
||||
print(f" {provider}/{csv_path.name} ({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
|
||||
|
||||
file_rows = 0
|
||||
|
||||
with open(csv_path, 'r', newline='', encoding='utf-8') as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
|
||||
for row in reader:
|
||||
global_id += 1
|
||||
file_rows += 1
|
||||
|
||||
# Build output row
|
||||
out_row = {
|
||||
'id': global_id,
|
||||
'provider': provider,
|
||||
'protocol': config['protocol'],
|
||||
'dnssec_mode': config['dnssec_mode'],
|
||||
'keep_alive': config['keep_alive'],
|
||||
'domain': row.get('domain', ''),
|
||||
'query_type': row.get('query_type', ''),
|
||||
'dns_server': row.get('dns_server', ''),
|
||||
'timestamp': row.get('timestamp', ''),
|
||||
'timestamp_unix': parse_timestamp_unix(row.get('timestamp', '')),
|
||||
'duration_ns': row.get('duration_ns', ''),
|
||||
'duration_ms': ns_to_ms(row.get('duration_ns', '')),
|
||||
'request_size_bytes': row.get('request_size_bytes', ''),
|
||||
'response_size_bytes': row.get('response_size_bytes', ''),
|
||||
'bytes_sent': row.get('bytes_sent', ''),
|
||||
'bytes_received': row.get('bytes_received', ''),
|
||||
'packets_sent': row.get('packets_sent', ''),
|
||||
'packets_received': row.get('packets_received', ''),
|
||||
'total_bytes': row.get('total_bytes', ''),
|
||||
'response_code': row.get('response_code', ''),
|
||||
'error': row.get('error', ''),
|
||||
}
|
||||
|
||||
writer.writerow(out_row)
|
||||
|
||||
total_rows += file_rows
|
||||
print(f" → {file_rows:,} rows")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Output: {output_path}")
|
||||
print(f"Total rows: {total_rows:,}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Merge all DNS test CSVs into a single file'
|
||||
)
|
||||
parser.add_argument(
|
||||
'input_dir',
|
||||
nargs='?',
|
||||
default='.',
|
||||
help='Input directory containing provider folders (default: .)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
default='dns_results.csv',
|
||||
help='Output CSV path (default: dns_results.csv)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
input_dir = Path(args.input_dir)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_dir.exists():
|
||||
print(f"Error: Input directory not found: {input_dir}")
|
||||
return 1
|
||||
|
||||
print("="*60)
|
||||
print("MERGE ALL DNS CSVs")
|
||||
print("="*60)
|
||||
print(f"Input: {input_dir}")
|
||||
print(f"Output: {output_path}")
|
||||
print()
|
||||
|
||||
merge_all_csvs(input_dir, output_path)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
@@ -0,0 +1,95 @@
|
||||
package stats
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RuntimeStats struct {
|
||||
TotalAlloc uint64
|
||||
Mallocs uint64
|
||||
NumGC uint32
|
||||
AllocDelta uint64
|
||||
MallocsDelta uint64
|
||||
GCDelta uint32
|
||||
}
|
||||
|
||||
type RuntimeCollector struct {
|
||||
startStats runtime.MemStats
|
||||
memPath string
|
||||
}
|
||||
|
||||
func NewRuntimeCollector(memPath string) *RuntimeCollector {
|
||||
var stats runtime.MemStats
|
||||
runtime.ReadMemStats(&stats)
|
||||
|
||||
return &RuntimeCollector{
|
||||
startStats: stats,
|
||||
memPath: memPath,
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *RuntimeCollector) Collect() RuntimeStats {
|
||||
var current runtime.MemStats
|
||||
runtime.ReadMemStats(¤t)
|
||||
|
||||
return RuntimeStats{
|
||||
TotalAlloc: current.TotalAlloc,
|
||||
Mallocs: current.Mallocs,
|
||||
NumGC: current.NumGC,
|
||||
AllocDelta: current.TotalAlloc - rc.startStats.TotalAlloc,
|
||||
MallocsDelta: current.Mallocs - rc.startStats.Mallocs,
|
||||
GCDelta: current.NumGC - rc.startStats.NumGC,
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *RuntimeCollector) WriteStats() error {
|
||||
stats := rc.Collect()
|
||||
timestamp := time.Now().Format(time.RFC3339Nano)
|
||||
|
||||
// Check if file exists
|
||||
fileExists := false
|
||||
if _, err := os.Stat(rc.memPath); err == nil {
|
||||
fileExists = true
|
||||
}
|
||||
|
||||
// Open in append mode
|
||||
file, err := os.OpenFile(rc.memPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open mem.csv: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := csv.NewWriter(file)
|
||||
|
||||
// Write header if new file
|
||||
if !fileExists {
|
||||
header := []string{
|
||||
"timestamp", "total_alloc_bytes", "mallocs", "gc_cycles",
|
||||
"alloc_delta", "mallocs_delta", "gc_delta",
|
||||
}
|
||||
if err := writer.Write(header); err != nil {
|
||||
return fmt.Errorf("failed to write mem.csv header: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Write data row
|
||||
row := []string{
|
||||
timestamp,
|
||||
fmt.Sprintf("%d", stats.TotalAlloc),
|
||||
fmt.Sprintf("%d", stats.Mallocs),
|
||||
fmt.Sprintf("%d", stats.NumGC),
|
||||
fmt.Sprintf("%d", stats.AllocDelta),
|
||||
fmt.Sprintf("%d", stats.MallocsDelta),
|
||||
fmt.Sprintf("%d", stats.GCDelta),
|
||||
}
|
||||
if err := writer.Write(row); err != nil {
|
||||
return fmt.Errorf("failed to write mem.csv row: %w", err)
|
||||
}
|
||||
|
||||
writer.Flush()
|
||||
return writer.Error()
|
||||
}
|
||||
Executable → Regular
@@ -1,369 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/gopacket"
|
||||
"github.com/google/gopacket/layers"
|
||||
"github.com/google/gopacket/pcapgo"
|
||||
)
|
||||
|
||||
type QueryRecord struct {
|
||||
Domain string
|
||||
QueryType string
|
||||
Protocol string
|
||||
DNSSec string
|
||||
AuthDNSSec string
|
||||
KeepAlive string
|
||||
DNSServer string
|
||||
Timestamp string
|
||||
DurationNs int64
|
||||
DurationMs float64
|
||||
RequestSizeBytes int
|
||||
ResponseSizeBytes int
|
||||
ResponseCode string
|
||||
Error string
|
||||
BytesSent int64
|
||||
BytesReceived int64
|
||||
PacketsSent int64
|
||||
PacketsReceived int64
|
||||
TotalBytes int64
|
||||
}
|
||||
|
||||
func parseRFC3339Nano(ts string) (time.Time, error) {
|
||||
return time.Parse(time.RFC3339Nano, ts)
|
||||
}
|
||||
|
||||
func processProviderFolder(providerPath string) error {
|
||||
providerName := filepath.Base(providerPath)
|
||||
fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
|
||||
|
||||
files, err := os.ReadDir(providerPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
processed := 0
|
||||
skipped := 0
|
||||
errors := 0
|
||||
|
||||
for _, file := range files {
|
||||
if !strings.HasSuffix(file.Name(), ".csv") {
|
||||
continue
|
||||
}
|
||||
|
||||
csvPath := filepath.Join(providerPath, file.Name())
|
||||
pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
|
||||
|
||||
// Check if PCAP exists
|
||||
if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
|
||||
fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if already processed (has backup)
|
||||
backupPath := csvPath + ".bak"
|
||||
if _, err := os.Stat(backupPath); err == nil {
|
||||
fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf(" ↻ Processing: %s ... ", file.Name())
|
||||
if err := processPair(csvPath, pcapPath); err != nil {
|
||||
fmt.Printf("ERROR\n")
|
||||
log.Printf(" Error: %v\n", err)
|
||||
errors++
|
||||
} else {
|
||||
fmt.Printf("✓\n")
|
||||
processed++
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
|
||||
return nil
|
||||
}
|
||||
|
||||
func processPair(csvPath, pcapPath string) error {
|
||||
// Create backup
|
||||
backupPath := csvPath + ".bak"
|
||||
input, err := os.ReadFile(csvPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("backup read failed: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(backupPath, input, 0644); err != nil {
|
||||
return fmt.Errorf("backup write failed: %w", err)
|
||||
}
|
||||
|
||||
// Read CSV records
|
||||
records, err := readCSV(csvPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CSV read failed: %w", err)
|
||||
}
|
||||
|
||||
if len(records) == 0 {
|
||||
return fmt.Errorf("no records in CSV")
|
||||
}
|
||||
|
||||
// Read and parse PCAP
|
||||
packets, err := readPCAPGo(pcapPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("PCAP read failed: %w", err)
|
||||
}
|
||||
|
||||
// Enrich records with bandwidth data
|
||||
enrichRecords(records, packets)
|
||||
|
||||
// Write enriched CSV
|
||||
if err := writeCSV(csvPath, records); err != nil {
|
||||
return fmt.Errorf("CSV write failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCSV(path string) ([]*QueryRecord, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r := csv.NewReader(f)
|
||||
rows, err := r.ReadAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(rows) < 2 {
|
||||
return nil, fmt.Errorf("CSV has no data rows")
|
||||
}
|
||||
|
||||
records := make([]*QueryRecord, 0, len(rows)-1)
|
||||
for i := 1; i < len(rows); i++ {
|
||||
row := rows[i]
|
||||
if len(row) < 14 {
|
||||
log.Printf(" Warning: Skipping malformed row %d", i+1)
|
||||
continue
|
||||
}
|
||||
|
||||
durationNs, _ := strconv.ParseInt(row[8], 10, 64)
|
||||
durationMs, _ := strconv.ParseFloat(row[9], 64)
|
||||
reqSize, _ := strconv.Atoi(row[10])
|
||||
respSize, _ := strconv.Atoi(row[11])
|
||||
|
||||
records = append(records, &QueryRecord{
|
||||
Domain: row[0],
|
||||
QueryType: row[1],
|
||||
Protocol: row[2],
|
||||
DNSSec: row[3],
|
||||
AuthDNSSec: row[4],
|
||||
KeepAlive: row[5],
|
||||
DNSServer: row[6],
|
||||
Timestamp: row[7],
|
||||
DurationNs: durationNs,
|
||||
DurationMs: durationMs,
|
||||
RequestSizeBytes: reqSize,
|
||||
ResponseSizeBytes: respSize,
|
||||
ResponseCode: row[12],
|
||||
Error: row[13],
|
||||
})
|
||||
}
|
||||
|
||||
return records, nil
|
||||
}
|
||||
|
||||
type PacketInfo struct {
|
||||
Timestamp time.Time
|
||||
Size int
|
||||
IsSent bool
|
||||
}
|
||||
|
||||
func readPCAPGo(path string) ([]PacketInfo, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
reader, err := pcapgo.NewReader(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var packets []PacketInfo
|
||||
packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
|
||||
|
||||
for packet := range packetSource.Packets() {
|
||||
if packet.NetworkLayer() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
isDNS := false
|
||||
isSent := false
|
||||
|
||||
// Check UDP layer (DNS, DoQ, DoH3)
|
||||
if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
|
||||
udp := udpLayer.(*layers.UDP)
|
||||
isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
|
||||
udp.SrcPort == 853 || udp.DstPort == 853 ||
|
||||
udp.SrcPort == 443 || udp.DstPort == 443
|
||||
isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
|
||||
}
|
||||
|
||||
// Check TCP layer (DoT, DoH)
|
||||
if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
|
||||
tcp := tcpLayer.(*layers.TCP)
|
||||
isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
|
||||
tcp.SrcPort == 853 || tcp.DstPort == 853 ||
|
||||
tcp.SrcPort == 443 || tcp.DstPort == 443
|
||||
isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
|
||||
}
|
||||
|
||||
if isDNS {
|
||||
packets = append(packets, PacketInfo{
|
||||
Timestamp: packet.Metadata().Timestamp,
|
||||
Size: len(packet.Data()),
|
||||
IsSent: isSent,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return packets, nil
|
||||
}
|
||||
|
||||
func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
|
||||
for _, rec := range records {
|
||||
ts, err := parseRFC3339Nano(rec.Timestamp)
|
||||
if err != nil {
|
||||
log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp)
|
||||
continue
|
||||
}
|
||||
|
||||
// Define time window for this query
|
||||
windowStart := ts
|
||||
windowEnd := ts.Add(time.Duration(rec.DurationNs))
|
||||
|
||||
var sent, recv, pktSent, pktRecv int64
|
||||
|
||||
// Match packets within the time window
|
||||
for _, pkt := range packets {
|
||||
if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
|
||||
pkt.Timestamp.Before(windowEnd) {
|
||||
if pkt.IsSent {
|
||||
sent += int64(pkt.Size)
|
||||
pktSent++
|
||||
} else {
|
||||
recv += int64(pkt.Size)
|
||||
pktRecv++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rec.BytesSent = sent
|
||||
rec.BytesReceived = recv
|
||||
rec.PacketsSent = pktSent
|
||||
rec.PacketsReceived = pktRecv
|
||||
rec.TotalBytes = sent + recv
|
||||
}
|
||||
}
|
||||
|
||||
func writeCSV(path string, records []*QueryRecord) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
w := csv.NewWriter(f)
|
||||
defer w.Flush()
|
||||
|
||||
// Write header
|
||||
header := []string{
|
||||
"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
|
||||
"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
|
||||
"request_size_bytes", "response_size_bytes", "response_code", "error",
|
||||
"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
|
||||
}
|
||||
if err := w.Write(header); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write data rows
|
||||
for _, rec := range records {
|
||||
row := []string{
|
||||
rec.Domain,
|
||||
rec.QueryType,
|
||||
rec.Protocol,
|
||||
rec.DNSSec,
|
||||
rec.AuthDNSSec,
|
||||
rec.KeepAlive,
|
||||
rec.DNSServer,
|
||||
rec.Timestamp,
|
||||
strconv.FormatInt(rec.DurationNs, 10),
|
||||
strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
|
||||
strconv.Itoa(rec.RequestSizeBytes),
|
||||
strconv.Itoa(rec.ResponseSizeBytes),
|
||||
rec.ResponseCode,
|
||||
rec.Error,
|
||||
strconv.FormatInt(rec.BytesSent, 10),
|
||||
strconv.FormatInt(rec.BytesReceived, 10),
|
||||
strconv.FormatInt(rec.PacketsSent, 10),
|
||||
strconv.FormatInt(rec.PacketsReceived, 10),
|
||||
strconv.FormatInt(rec.TotalBytes, 10),
|
||||
}
|
||||
if err := w.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
resultsDir := "results"
|
||||
providers := []string{"adguard", "cloudflare", "google", "quad9"}
|
||||
|
||||
fmt.Println("╔═══════════════════════════════════════════════╗")
|
||||
fmt.Println("║ DNS PCAP Preprocessor v1.0 ║")
|
||||
fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║")
|
||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
||||
|
||||
totalProcessed := 0
|
||||
totalSkipped := 0
|
||||
totalErrors := 0
|
||||
|
||||
for _, provider := range providers {
|
||||
providerPath := filepath.Join(resultsDir, provider)
|
||||
if _, err := os.Stat(providerPath); os.IsNotExist(err) {
|
||||
fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := processProviderFolder(providerPath); err != nil {
|
||||
log.Printf("Error processing %s: %v\n", provider, err)
|
||||
totalErrors++
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("\n╔═══════════════════════════════════════════════╗")
|
||||
fmt.Println("║ Preprocessing Complete! ║")
|
||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
||||
fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
|
||||
fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n")
|
||||
fmt.Printf(" • bytes_received - Total bytes received from DNS server\n")
|
||||
fmt.Printf(" • packets_sent - Number of packets sent\n")
|
||||
fmt.Printf(" • packets_received - Number of packets received\n")
|
||||
fmt.Printf(" • total_bytes - Sum of sent + received bytes\n")
|
||||
fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
|
||||
fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
|
||||
fmt.Printf(" but all files now have complete bandwidth metrics!\n")
|
||||
}
|
||||
@@ -1,367 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Advanced PCAP filter for DNS traffic (with IPv6 support).
|
||||
|
||||
Filters out:
|
||||
- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses)
|
||||
- AdGuard DNS servers (for non-AdGuard captures)
|
||||
- Non-DNS traffic based on protocol-specific ports
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
# Test machine IPs (IPv4 and IPv6 from your provided info)
|
||||
TEST_IPV4 = '10.0.0.50'
|
||||
TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e'
|
||||
TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d'
|
||||
|
||||
# Port mappings
|
||||
PORT_MAP = {
|
||||
'udp': [53], # DNS-over-UDP
|
||||
'tls': [53, 853], # DNS-over-TLS
|
||||
'https': [53, 443], # DNS-over-HTTPS (DoH)
|
||||
'doq': [53, 784, 8853], # DNS-over-QUIC
|
||||
'doh3': [53, 443] # DNS-over-HTTP/3
|
||||
}
|
||||
|
||||
# AdGuard DNS IPs to filter out (for non-AdGuard captures)
|
||||
ADGUARD_IPS = [
|
||||
'94.140.14.14',
|
||||
'94.140.15.15',
|
||||
'2a10:50c0::ad1:ff',
|
||||
'2a10:50c0::ad2:ff'
|
||||
]
|
||||
|
||||
def parse_filename(filename):
|
||||
"""Extract protocol from filename"""
|
||||
base = filename.replace('.pcap', '').replace('.csv', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 1: # Minimum: protocol
|
||||
return None
|
||||
|
||||
protocol = parts[0].lower()
|
||||
return protocol
|
||||
|
||||
def extract_resolver_from_path(pcap_path):
|
||||
"""Extract resolver name from directory structure"""
|
||||
parts = Path(pcap_path).parts
|
||||
|
||||
for part in parts:
|
||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
||||
return part.lower()
|
||||
|
||||
return None
|
||||
|
||||
def build_filter_expression(protocol, resolver):
|
||||
"""
|
||||
Build tshark filter expression.
|
||||
|
||||
Strategy:
|
||||
1. Only protocol-specific DNS ports
|
||||
2. Keep only traffic involving the test machine (IPv4/IPv6)
|
||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
||||
"""
|
||||
|
||||
# Get ports for this protocol
|
||||
ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853])
|
||||
|
||||
# Build port filter (UDP or TCP on these ports)
|
||||
port_conditions = []
|
||||
for port in ports:
|
||||
port_conditions.append(f'(udp.port == {port} or tcp.port == {port})')
|
||||
|
||||
port_filter = ' or '.join(port_conditions)
|
||||
|
||||
# Build test machine filter (keep if src or dst is test machine IP)
|
||||
machine_conditions = [f'(ip.addr == {TEST_IPV4})']
|
||||
if TEST_IPV6_GLOBAL:
|
||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})')
|
||||
if TEST_IPV6_LINKLOCAL:
|
||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})')
|
||||
|
||||
machine_filter = ' or '.join(machine_conditions)
|
||||
|
||||
# Build AdGuard exclusion filter
|
||||
adguard_exclusions = []
|
||||
if resolver != 'adguard':
|
||||
for ip in ADGUARD_IPS:
|
||||
if ':' in ip: # IPv6
|
||||
adguard_exclusions.append(f'!(ipv6.addr == {ip})')
|
||||
else: # IPv4
|
||||
adguard_exclusions.append(f'!(ip.addr == {ip})')
|
||||
|
||||
# Combine all filters
|
||||
filters = [f'({port_filter})', f'({machine_filter})']
|
||||
|
||||
if adguard_exclusions:
|
||||
adguard_filter = ' and '.join(adguard_exclusions)
|
||||
filters.append(f'({adguard_filter})')
|
||||
|
||||
final_filter = ' and '.join(filters)
|
||||
|
||||
return final_filter
|
||||
|
||||
def filter_pcap(input_path, output_path, filter_expr, verbose=False):
|
||||
"""Apply filter to PCAP file using tshark"""
|
||||
|
||||
cmd = [
|
||||
'tshark',
|
||||
'-r', input_path,
|
||||
'-Y', filter_expr,
|
||||
'-w', output_path,
|
||||
'-F', 'pcap'
|
||||
]
|
||||
|
||||
try:
|
||||
if verbose:
|
||||
print(f" Filter: {filter_expr}")
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f" ✗ Error: {result.stderr.strip()}")
|
||||
return False
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
print(f" ✗ Output file not created")
|
||||
return False
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
if output_size < 24:
|
||||
print(f" ⚠ Warning: Output is empty")
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f" ✗ Timeout (>5 minutes)")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" ✗ Exception: {e}")
|
||||
return False
|
||||
|
||||
def find_pcap_files(root_dir):
|
||||
"""Recursively find all PCAP files"""
|
||||
pcap_files = []
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
for file in files:
|
||||
if file.endswith('.pcap'):
|
||||
full_path = os.path.join(root, file)
|
||||
pcap_files.append(full_path)
|
||||
return sorted(pcap_files)
|
||||
|
||||
def format_bytes(bytes_val):
|
||||
"""Format bytes as human readable"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes_val < 1024.0:
|
||||
return f"{bytes_val:.1f} {unit}"
|
||||
bytes_val /= 1024.0
|
||||
return f"{bytes_val:.1f} TB"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Filtering rules:
|
||||
1. Only include traffic on protocol-specific DNS ports
|
||||
2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses)
|
||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
||||
|
||||
Protocol-specific ports:
|
||||
udp: 53
|
||||
tls: 53, 853
|
||||
https: 53, 443
|
||||
doq: 53, 784, 8853
|
||||
doh3: 53, 443
|
||||
|
||||
Examples:
|
||||
# Dry run
|
||||
%(prog)s ./results --dry-run
|
||||
|
||||
# Filter with verbose output
|
||||
%(prog)s ./results --verbose
|
||||
|
||||
# Custom output directory
|
||||
%(prog)s ./results --output ./cleaned
|
||||
'''
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input_dir',
|
||||
help='Input directory containing PCAP files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
default='./results_filtered',
|
||||
help='Output directory (default: ./results_filtered)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without filtering'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--limit',
|
||||
type=int,
|
||||
help='Only process first N files (for testing)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='Verbose output (show filter expressions)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--overwrite',
|
||||
action='store_true',
|
||||
help='Overwrite existing filtered files'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for tshark
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['tshark', '-v'],
|
||||
capture_output=True,
|
||||
check=True
|
||||
)
|
||||
if args.verbose:
|
||||
version = result.stdout.decode().split('\n')[0]
|
||||
print(f"Using: {version}\n")
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("Error: tshark not found. Install Wireshark/tshark:")
|
||||
print(" Ubuntu/Debian: sudo apt-get install tshark")
|
||||
print(" macOS: brew install wireshark")
|
||||
return 1
|
||||
|
||||
print("=" * 80)
|
||||
print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)")
|
||||
print("=" * 80)
|
||||
print("Filters:")
|
||||
print(" 1. Protocol-specific DNS ports only")
|
||||
print(" 2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)")
|
||||
print(" 3. Exclude AdGuard IPs (for non-AdGuard captures)")
|
||||
print(f"\nInput: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
|
||||
# Find PCAP files
|
||||
print(f"\nScanning for PCAP files...")
|
||||
pcap_files = find_pcap_files(args.input_dir)
|
||||
|
||||
if not pcap_files:
|
||||
print(f"No PCAP files found in {args.input_dir}")
|
||||
return 1
|
||||
|
||||
print(f"Found {len(pcap_files)} PCAP files")
|
||||
|
||||
total_input_size = sum(os.path.getsize(f) for f in pcap_files)
|
||||
print(f"Total size: {format_bytes(total_input_size)}")
|
||||
|
||||
if args.limit:
|
||||
pcap_files = pcap_files[:args.limit]
|
||||
print(f"Limiting to first {args.limit} files")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n*** DRY RUN MODE ***\n")
|
||||
else:
|
||||
print()
|
||||
|
||||
# Process files
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
fail_count = 0
|
||||
total_output_size = 0
|
||||
|
||||
for i, input_path in enumerate(pcap_files, 1):
|
||||
# Extract info from path
|
||||
filename = Path(input_path).name
|
||||
protocol = parse_filename(filename)
|
||||
resolver = extract_resolver_from_path(input_path)
|
||||
|
||||
if not protocol:
|
||||
print(f"[{i}/{len(pcap_files)}] {filename}")
|
||||
print(f" ⚠ Could not parse protocol, skipping")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
# Create output path
|
||||
rel_path = os.path.relpath(input_path, args.input_dir)
|
||||
output_path = os.path.join(args.output, rel_path)
|
||||
|
||||
input_size = os.path.getsize(input_path)
|
||||
|
||||
print(f"[{i}/{len(pcap_files)}] {rel_path}")
|
||||
print(f" Protocol: {protocol.upper()}")
|
||||
print(f" Resolver: {resolver or 'unknown'}")
|
||||
print(f" Size: {format_bytes(input_size)}")
|
||||
|
||||
# Check if already filtered
|
||||
if os.path.exists(output_path) and not args.overwrite:
|
||||
output_size = os.path.getsize(output_path)
|
||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
||||
print(f" ⊙ Already filtered: {format_bytes(output_size)} "
|
||||
f"({reduction:.1f}% reduction)")
|
||||
skip_count += 1
|
||||
total_output_size += output_size
|
||||
continue
|
||||
|
||||
# Build filter
|
||||
filter_expr = build_filter_expression(protocol, resolver)
|
||||
|
||||
if args.dry_run:
|
||||
print(f" → Would filter")
|
||||
if args.verbose:
|
||||
print(f" Filter: {filter_expr}")
|
||||
continue
|
||||
|
||||
# Create output directory
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Filter
|
||||
success = filter_pcap(input_path, output_path, filter_expr, args.verbose)
|
||||
|
||||
if success:
|
||||
output_size = os.path.getsize(output_path)
|
||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
||||
print(f" ✓ Filtered: {format_bytes(output_size)} "
|
||||
f"({reduction:.1f}% reduction)")
|
||||
success_count += 1
|
||||
total_output_size += output_size
|
||||
else:
|
||||
fail_count += 1
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if args.dry_run:
|
||||
print(f"Would process: {len(pcap_files)} files")
|
||||
else:
|
||||
print(f"Successful: {success_count}")
|
||||
print(f"Skipped: {skip_count} (already filtered or unparseable)")
|
||||
print(f"Failed: {fail_count}")
|
||||
print(f"Total: {len(pcap_files)}")
|
||||
|
||||
if success_count > 0 or skip_count > 0:
|
||||
print(f"\nInput size: {format_bytes(total_input_size)}")
|
||||
print(f"Output size: {format_bytes(total_output_size)}")
|
||||
if total_input_size > 0:
|
||||
reduction = ((total_input_size - total_output_size) /
|
||||
total_input_size * 100)
|
||||
print(f"Reduction: {reduction:.1f}%")
|
||||
print(f"\nOutput directory: {args.output}")
|
||||
|
||||
return 0 if fail_count == 0 else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
@@ -1,426 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert DNS CSV files to SQLite database.
|
||||
Creates a single normalized table with unified DNSSEC handling.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
|
||||
def create_database_schema(conn: sqlite3.Connection):
|
||||
"""Create the database schema with indexes."""
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Main queries table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS dns_queries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
||||
-- Metadata
|
||||
provider TEXT NOT NULL,
|
||||
protocol TEXT NOT NULL,
|
||||
dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
|
||||
|
||||
-- Query details
|
||||
domain TEXT NOT NULL,
|
||||
query_type TEXT NOT NULL,
|
||||
keep_alive BOOLEAN NOT NULL,
|
||||
dns_server TEXT NOT NULL,
|
||||
|
||||
-- Timing
|
||||
timestamp TEXT NOT NULL,
|
||||
timestamp_unix REAL NOT NULL,
|
||||
duration_ns INTEGER NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
|
||||
-- Size metrics
|
||||
request_size_bytes INTEGER,
|
||||
response_size_bytes INTEGER,
|
||||
|
||||
-- Network metrics (from PCAP)
|
||||
bytes_sent INTEGER DEFAULT 0,
|
||||
bytes_received INTEGER DEFAULT 0,
|
||||
packets_sent INTEGER DEFAULT 0,
|
||||
packets_received INTEGER DEFAULT 0,
|
||||
total_bytes INTEGER DEFAULT 0,
|
||||
|
||||
-- Response
|
||||
response_code TEXT,
|
||||
error TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for common queries
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_provider
|
||||
ON dns_queries(provider)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_protocol
|
||||
ON dns_queries(protocol)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_dnssec_mode
|
||||
ON dns_queries(dnssec_mode)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_keep_alive
|
||||
ON dns_queries(keep_alive)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec
|
||||
ON dns_queries(provider, protocol, dnssec_mode)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_timestamp
|
||||
ON dns_queries(timestamp_unix)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_domain
|
||||
ON dns_queries(domain)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
|
||||
"""
|
||||
Extract base protocol, DNSSEC mode, and keep_alive from filename.
|
||||
Returns (base_protocol, dnssec_mode, keep_alive)
|
||||
|
||||
Examples:
|
||||
'udp.csv' -> ('udp', 'off', False)
|
||||
'udp-auth.csv' -> ('udp', 'auth', False)
|
||||
'tls.csv' -> ('tls', 'off', False)
|
||||
'tls-persist.csv' -> ('tls', 'off', True)
|
||||
'https-persist.csv' -> ('https', 'off', True)
|
||||
'https-auth-persist.csv' -> ('https', 'auth', True)
|
||||
'https-trust-persist.csv' -> ('https', 'trust', True)
|
||||
'doh3-auth.csv' -> ('doh3', 'auth', False)
|
||||
'doq.csv' -> ('doq', 'off', False)
|
||||
"""
|
||||
name = filename.replace('.csv', '')
|
||||
|
||||
# Check for persist suffix (keep_alive)
|
||||
keep_alive = False
|
||||
if name.endswith('-persist'):
|
||||
keep_alive = True
|
||||
name = name.replace('-persist', '')
|
||||
|
||||
# Check for DNSSEC suffix
|
||||
dnssec_mode = 'off'
|
||||
if name.endswith('-auth'):
|
||||
dnssec_mode = 'auth'
|
||||
name = name.replace('-auth', '')
|
||||
elif name.endswith('-trust'):
|
||||
dnssec_mode = 'trust'
|
||||
name = name.replace('-trust', '')
|
||||
|
||||
# For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
|
||||
if name in ['udp', 'doh3', 'doq']:
|
||||
keep_alive = False
|
||||
|
||||
return (name, dnssec_mode, keep_alive)
|
||||
|
||||
|
||||
def str_to_bool(value: str) -> bool:
|
||||
"""Convert string boolean to Python bool."""
|
||||
return value.lower() in ('true', '1', 'yes')
|
||||
|
||||
|
||||
def import_csv_to_db(
|
||||
csv_path: Path,
|
||||
provider: str,
|
||||
conn: sqlite3.Connection
|
||||
) -> int:
|
||||
"""Import a CSV file into the database."""
|
||||
protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
|
||||
|
||||
cursor = conn.cursor()
|
||||
rows_imported = 0
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
|
||||
for row in reader:
|
||||
try:
|
||||
# Parse timestamp to Unix epoch
|
||||
dt = date_parser.isoparse(row['timestamp'])
|
||||
timestamp_unix = dt.timestamp()
|
||||
|
||||
# Use keep_alive from filename (more reliable than CSV)
|
||||
keep_alive = keep_alive_from_filename
|
||||
|
||||
# Handle optional fields (may not exist in older CSVs)
|
||||
bytes_sent = int(row.get('bytes_sent', 0) or 0)
|
||||
bytes_received = int(row.get('bytes_received', 0) or 0)
|
||||
packets_sent = int(row.get('packets_sent', 0) or 0)
|
||||
packets_received = int(row.get('packets_received', 0) or 0)
|
||||
total_bytes = int(row.get('total_bytes', 0) or 0)
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO dns_queries (
|
||||
provider, protocol, dnssec_mode,
|
||||
domain, query_type, keep_alive,
|
||||
dns_server, timestamp, timestamp_unix,
|
||||
duration_ns, duration_ms,
|
||||
request_size_bytes, response_size_bytes,
|
||||
bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
|
||||
response_code, error
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
provider,
|
||||
protocol,
|
||||
dnssec_mode,
|
||||
row['domain'],
|
||||
row['query_type'],
|
||||
keep_alive,
|
||||
row['dns_server'],
|
||||
row['timestamp'],
|
||||
timestamp_unix,
|
||||
int(row['duration_ns']),
|
||||
float(row['duration_ms']),
|
||||
int(row.get('request_size_bytes') or 0),
|
||||
int(row.get('response_size_bytes') or 0),
|
||||
bytes_sent,
|
||||
bytes_received,
|
||||
packets_sent,
|
||||
packets_received,
|
||||
total_bytes,
|
||||
row.get('response_code', ''),
|
||||
row.get('error', '')
|
||||
))
|
||||
|
||||
rows_imported += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" Warning: Skipping row - {e}")
|
||||
continue
|
||||
|
||||
conn.commit()
|
||||
return rows_imported
|
||||
|
||||
|
||||
def main():
|
||||
"""Main import pipeline."""
|
||||
print("\n" + "="*60)
|
||||
print("CSV to SQLite Database Converter")
|
||||
print("="*60)
|
||||
|
||||
results_dir = Path('results')
|
||||
db_path = Path('dns.db')
|
||||
|
||||
if not results_dir.exists():
|
||||
print(f"\n❌ Error: '{results_dir}' directory not found")
|
||||
return
|
||||
|
||||
# Remove existing database
|
||||
if db_path.exists():
|
||||
print(f"\n⚠ Removing existing database: {db_path}")
|
||||
db_path.unlink()
|
||||
|
||||
# Create database and schema
|
||||
print(f"\n📊 Creating database: {db_path}")
|
||||
conn = sqlite3.connect(db_path)
|
||||
create_database_schema(conn)
|
||||
print("✓ Schema created")
|
||||
|
||||
# Import CSVs
|
||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
||||
total_rows = 0
|
||||
total_files = 0
|
||||
|
||||
for provider in providers:
|
||||
provider_path = results_dir / provider
|
||||
|
||||
if not provider_path.exists():
|
||||
print(f"\n⚠ Skipping {provider} - directory not found")
|
||||
continue
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Importing: {provider.upper()}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
csv_files = sorted(provider_path.glob('*.csv'))
|
||||
provider_rows = 0
|
||||
provider_files = 0
|
||||
|
||||
for csv_path in csv_files:
|
||||
# Skip backup files
|
||||
if '.bak' in csv_path.name:
|
||||
continue
|
||||
|
||||
protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
|
||||
ka_str = "persistent" if keep_alive else "non-persist"
|
||||
print(f" 📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
|
||||
|
||||
rows = import_csv_to_db(csv_path, provider, conn)
|
||||
print(f" ✓ Imported {rows:,} rows")
|
||||
|
||||
provider_rows += rows
|
||||
provider_files += 1
|
||||
|
||||
print(f"\n Total: {provider_files} files, {provider_rows:,} rows")
|
||||
total_rows += provider_rows
|
||||
total_files += provider_files
|
||||
|
||||
# Create summary
|
||||
print(f"\n{'='*60}")
|
||||
print("Database Summary")
|
||||
print(f"{'='*60}")
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Total counts
|
||||
cursor.execute("SELECT COUNT(*) FROM dns_queries")
|
||||
total_queries = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
|
||||
unique_providers = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
|
||||
unique_protocols = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
|
||||
unique_domains = cursor.fetchone()[0]
|
||||
|
||||
print(f"\nTotal queries: {total_queries:,}")
|
||||
print(f"Providers: {unique_providers}")
|
||||
print(f"Protocols: {unique_protocols}")
|
||||
print(f"Unique domains: {unique_domains}")
|
||||
|
||||
# Show breakdown by provider, protocol, DNSSEC, and keep_alive
|
||||
print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY provider, protocol, dnssec_mode, keep_alive
|
||||
ORDER BY provider, protocol, dnssec_mode, keep_alive
|
||||
""")
|
||||
|
||||
current_provider = None
|
||||
for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
|
||||
if current_provider != provider:
|
||||
if current_provider is not None:
|
||||
print()
|
||||
current_provider = provider
|
||||
|
||||
ka_str = "✓" if keep_alive else "✗"
|
||||
print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
|
||||
|
||||
# Protocol distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("Protocol Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT protocol, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY protocol
|
||||
ORDER BY protocol
|
||||
""")
|
||||
|
||||
for protocol, count in cursor.fetchall():
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
# DNSSEC mode distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("DNSSEC Mode Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT dnssec_mode, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY dnssec_mode
|
||||
ORDER BY dnssec_mode
|
||||
""")
|
||||
|
||||
for dnssec_mode, count in cursor.fetchall():
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
# Keep-Alive distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("Keep-Alive Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT keep_alive, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY keep_alive
|
||||
""")
|
||||
|
||||
for keep_alive, count in cursor.fetchall():
|
||||
ka_label = "Persistent" if keep_alive else "Non-persistent"
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
conn.close()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✓ Database created successfully: {db_path}")
|
||||
print(f" Total: {total_files} files, {total_rows:,} rows")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Print usage examples
|
||||
print("\n📖 Usage Examples for Metabase:")
|
||||
print(f"{'-'*60}")
|
||||
|
||||
print("\n1. Compare protocols (DNSSEC off, persistent only):")
|
||||
print(""" SELECT provider, protocol,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
WHERE dnssec_mode = 'off' AND keep_alive = 1
|
||||
GROUP BY provider, protocol;""")
|
||||
|
||||
print("\n2. DNSSEC impact on UDP:")
|
||||
print(""" SELECT provider, dnssec_mode,
|
||||
AVG(duration_ms) as avg_latency
|
||||
FROM dns_queries
|
||||
WHERE protocol = 'udp'
|
||||
GROUP BY provider, dnssec_mode;""")
|
||||
|
||||
print("\n3. Keep-alive impact on TLS:")
|
||||
print(""" SELECT provider, keep_alive,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
WHERE protocol = 'tls' AND dnssec_mode = 'off'
|
||||
GROUP BY provider, keep_alive;""")
|
||||
|
||||
print("\n4. Time series for line graphs:")
|
||||
print(""" SELECT timestamp_unix, duration_ms, total_bytes
|
||||
FROM dns_queries
|
||||
WHERE provider = 'cloudflare'
|
||||
AND protocol = 'https'
|
||||
AND dnssec_mode = 'off'
|
||||
AND keep_alive = 1
|
||||
ORDER BY timestamp_unix;""")
|
||||
|
||||
print("\n5. Overall comparison table:")
|
||||
print(""" SELECT protocol, dnssec_mode, keep_alive,
|
||||
COUNT(*) as queries,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
GROUP BY protocol, dnssec_mode, keep_alive
|
||||
ORDER BY protocol, dnssec_mode, keep_alive;""")
|
||||
|
||||
print(f"\n{'-'*60}\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,274 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Merge DNS test files by configuration.
|
||||
|
||||
- Merges CSVs of same config (adds 'run_id' column for traceability)
|
||||
- Optionally merges PCAPs using mergecap
|
||||
- Flattens date structure
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import subprocess
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
|
||||
def parse_filename(filename):
|
||||
"""
|
||||
Extract config key from filename.
|
||||
Format: protocol[-flags]-timestamp.{csv,pcap}
|
||||
Config key: protocol[-flags] (ignores timestamp)
|
||||
"""
|
||||
base = filename.replace('.csv', '').replace('.pcap', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 2:
|
||||
return None
|
||||
|
||||
# Config is everything except timestamp
|
||||
config = '-'.join(parts[:-1])
|
||||
timestamp = parts[-1]
|
||||
|
||||
return config, timestamp
|
||||
|
||||
def extract_resolver_from_path(file_path):
|
||||
"""Extract resolver name from path"""
|
||||
parts = Path(file_path).parts
|
||||
for part in parts:
|
||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
||||
return part.lower()
|
||||
return None
|
||||
|
||||
def find_files(root_dir, extension):
|
||||
"""Find all files with given extension"""
|
||||
files = []
|
||||
for root, dirs, filenames in os.walk(root_dir):
|
||||
for filename in filenames:
|
||||
if filename.endswith(extension):
|
||||
full_path = os.path.join(root, filename)
|
||||
files.append(full_path)
|
||||
return sorted(files)
|
||||
|
||||
def merge_csvs(csv_files, output_path, fieldnames):
|
||||
"""Merge multiple CSVs into one, adding 'run_id' column"""
|
||||
with open(output_path, 'w', newline='') as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id'])
|
||||
writer.writeheader()
|
||||
|
||||
for csv_path in csv_files:
|
||||
# Use timestamp as run_id
|
||||
filename = Path(csv_path).name
|
||||
_, timestamp = parse_filename(filename)
|
||||
run_id = timestamp # Or add date if needed
|
||||
|
||||
with open(csv_path, 'r', newline='') as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
for row in reader:
|
||||
row['run_id'] = run_id
|
||||
writer.writerow(row)
|
||||
|
||||
def merge_pcaps(pcap_files, output_path):
|
||||
"""Merge PCAP files using mergecap"""
|
||||
cmd = ['mergecap', '-w', output_path] + pcap_files
|
||||
try:
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f" ✗ mergecap error: {e.stderr.decode()}")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
print("Error: mergecap not found. Install Wireshark:")
|
||||
print(" Ubuntu: sudo apt install wireshark-common")
|
||||
print(" macOS: brew install wireshark")
|
||||
return False
|
||||
|
||||
def format_bytes(bytes_val):
|
||||
"""Format bytes as human readable"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes_val < 1024.0:
|
||||
return f"{bytes_val:.1f} {unit}"
|
||||
bytes_val /= 1024.0
|
||||
return f"{bytes_val:.1f} TB"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Merge DNS test files by configuration',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Merges files of same config across dates/timestamps.
|
||||
Output: ./results_merged/[resolver]/[config].csv (merged)
|
||||
./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps)
|
||||
|
||||
Examples:
|
||||
# Dry run to preview
|
||||
%(prog)s ./results --dry-run
|
||||
|
||||
# Merge CSVs only (recommended)
|
||||
%(prog)s ./results
|
||||
|
||||
# Merge CSVs and PCAPs
|
||||
%(prog)s ./results --merge-pcaps
|
||||
|
||||
# Custom output directory
|
||||
%(prog)s ./results --output ./merged_data
|
||||
'''
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input_dir',
|
||||
help='Input directory (e.g., ./results)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
default='./results_merged',
|
||||
help='Output directory (default: ./results_merged)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--merge-pcaps',
|
||||
action='store_true',
|
||||
help='Merge PCAP files (requires mergecap from Wireshark)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without merging'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-y', '--yes',
|
||||
action='store_true',
|
||||
help='Skip confirmation prompt'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.input_dir):
|
||||
print(f"Error: Input directory not found: {args.input_dir}")
|
||||
return 1
|
||||
|
||||
# Find all files
|
||||
print("=" * 80)
|
||||
print("MERGE DNS TEST FILES")
|
||||
print("=" * 80)
|
||||
print(f"Input: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}")
|
||||
|
||||
csv_files = find_files(args.input_dir, '.csv')
|
||||
pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else []
|
||||
|
||||
if not csv_files and not pcap_files:
|
||||
print("\nNo CSV/PCAP files found")
|
||||
return 1
|
||||
|
||||
print(f"\nFound {len(csv_files)} CSV files")
|
||||
if args.merge_pcaps:
|
||||
print(f"Found {len(pcap_files)} PCAP files")
|
||||
|
||||
# Group files by resolver and config
|
||||
csv_groups = defaultdict(list)
|
||||
pcap_groups = defaultdict(list)
|
||||
|
||||
for csv_path in csv_files:
|
||||
config, _ = parse_filename(Path(csv_path).name)
|
||||
resolver = extract_resolver_from_path(csv_path)
|
||||
if config and resolver:
|
||||
key = (resolver, config)
|
||||
csv_groups[key].append(csv_path)
|
||||
|
||||
for pcap_path in pcap_files:
|
||||
config, _ = parse_filename(Path(pcap_path).name)
|
||||
resolver = extract_resolver_from_path(pcap_path)
|
||||
if config and resolver:
|
||||
key = (resolver, config)
|
||||
pcap_groups[key].append(pcap_path)
|
||||
|
||||
# Summary
|
||||
print("\nConfigs to merge:")
|
||||
print("-" * 80)
|
||||
for (resolver, config), files in sorted(csv_groups.items()):
|
||||
print(f" {resolver}/{config}: {len(files)} runs")
|
||||
|
||||
total_runs = sum(len(files) for files in csv_groups.values())
|
||||
print(f"\nTotal configs: {len(csv_groups)}")
|
||||
print(f"Total runs: {total_runs}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n*** DRY RUN MODE ***\n")
|
||||
for (resolver, config) in sorted(csv_groups.keys()):
|
||||
print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)")
|
||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
||||
print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)")
|
||||
return 0
|
||||
|
||||
# Confirmation
|
||||
if not args.yes:
|
||||
response = input(f"\nMerge all into {args.output}? [y/N] ")
|
||||
if response.lower() not in ['y', 'yes']:
|
||||
print("Cancelled")
|
||||
return 0
|
||||
|
||||
# Merge
|
||||
print("\n" + "=" * 80)
|
||||
print("MERGING FILES")
|
||||
print("=" * 80)
|
||||
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
total_queries = 0
|
||||
total_size = 0
|
||||
|
||||
# Get standard CSV fieldnames (from first file)
|
||||
first_csv = next(iter(csv_files))
|
||||
with open(first_csv, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
fieldnames = reader.fieldnames
|
||||
|
||||
for (resolver, config), files in sorted(csv_groups.items()):
|
||||
print(f"\n{resolver}/{config} ({len(files)} runs)")
|
||||
|
||||
# Merge CSVs
|
||||
output_csv = os.path.join(args.output, resolver, f"{config}.csv")
|
||||
os.makedirs(os.path.dirname(output_csv), exist_ok=True)
|
||||
|
||||
merge_csvs(files, output_csv, fieldnames)
|
||||
|
||||
# Count queries in merged file
|
||||
with open(output_csv, 'r') as f:
|
||||
query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header
|
||||
|
||||
print(f" ✓ Merged CSV: {query_count:,} queries")
|
||||
total_queries += query_count
|
||||
success_count += 1
|
||||
|
||||
# Merge PCAPs if requested
|
||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
||||
output_pcap = os.path.join(args.output, resolver, f"{config}.pcap")
|
||||
pcap_list = pcap_groups[(resolver, config)]
|
||||
|
||||
if merge_pcaps(pcap_list, output_pcap):
|
||||
merged_size = os.path.getsize(output_pcap)
|
||||
orig_size = sum(os.path.getsize(p) for p in pcap_list)
|
||||
print(f" ✓ Merged PCAP: {format_bytes(merged_size)} "
|
||||
f"(from {format_bytes(orig_size)})")
|
||||
total_size += merged_size
|
||||
else:
|
||||
print(f" ✗ PCAP merge failed")
|
||||
fail_count += 1
|
||||
|
||||
# Final summary
|
||||
print("\n" + "=" * 80)
|
||||
print("COMPLETE")
|
||||
print("=" * 80)
|
||||
print(f"Successful configs: {success_count}")
|
||||
print(f"Failed: {fail_count}")
|
||||
print(f"Total queries: {total_queries:,}")
|
||||
if args.merge_pcaps:
|
||||
print(f"Total PCAP size: {format_bytes(total_size)}")
|
||||
print(f"\nMerged files in: {args.output}")
|
||||
|
||||
return 0 if fail_count == 0 else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user