feat(dns): add dnscrypt and dns over tcp
This commit is contained in:
369
scripts/tools/add_extra_metrics_to_csv.go
Normal file
369
scripts/tools/add_extra_metrics_to_csv.go
Normal file
@@ -0,0 +1,369 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/gopacket"
|
||||
"github.com/google/gopacket/layers"
|
||||
"github.com/google/gopacket/pcapgo"
|
||||
)
|
||||
|
||||
type QueryRecord struct {
|
||||
Domain string
|
||||
QueryType string
|
||||
Protocol string
|
||||
DNSSec string
|
||||
AuthDNSSec string
|
||||
KeepAlive string
|
||||
DNSServer string
|
||||
Timestamp string
|
||||
DurationNs int64
|
||||
DurationMs float64
|
||||
RequestSizeBytes int
|
||||
ResponseSizeBytes int
|
||||
ResponseCode string
|
||||
Error string
|
||||
BytesSent int64
|
||||
BytesReceived int64
|
||||
PacketsSent int64
|
||||
PacketsReceived int64
|
||||
TotalBytes int64
|
||||
}
|
||||
|
||||
func parseRFC3339Nano(ts string) (time.Time, error) {
|
||||
return time.Parse(time.RFC3339Nano, ts)
|
||||
}
|
||||
|
||||
func processProviderFolder(providerPath string) error {
|
||||
providerName := filepath.Base(providerPath)
|
||||
fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
|
||||
|
||||
files, err := os.ReadDir(providerPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
processed := 0
|
||||
skipped := 0
|
||||
errors := 0
|
||||
|
||||
for _, file := range files {
|
||||
if !strings.HasSuffix(file.Name(), ".csv") {
|
||||
continue
|
||||
}
|
||||
|
||||
csvPath := filepath.Join(providerPath, file.Name())
|
||||
pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
|
||||
|
||||
// Check if PCAP exists
|
||||
if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
|
||||
fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if already processed (has backup)
|
||||
backupPath := csvPath + ".bak"
|
||||
if _, err := os.Stat(backupPath); err == nil {
|
||||
fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf(" ↻ Processing: %s ... ", file.Name())
|
||||
if err := processPair(csvPath, pcapPath); err != nil {
|
||||
fmt.Printf("ERROR\n")
|
||||
log.Printf(" Error: %v\n", err)
|
||||
errors++
|
||||
} else {
|
||||
fmt.Printf("✓\n")
|
||||
processed++
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
|
||||
return nil
|
||||
}
|
||||
|
||||
func processPair(csvPath, pcapPath string) error {
|
||||
// Create backup
|
||||
backupPath := csvPath + ".bak"
|
||||
input, err := os.ReadFile(csvPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("backup read failed: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(backupPath, input, 0644); err != nil {
|
||||
return fmt.Errorf("backup write failed: %w", err)
|
||||
}
|
||||
|
||||
// Read CSV records
|
||||
records, err := readCSV(csvPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CSV read failed: %w", err)
|
||||
}
|
||||
|
||||
if len(records) == 0 {
|
||||
return fmt.Errorf("no records in CSV")
|
||||
}
|
||||
|
||||
// Read and parse PCAP
|
||||
packets, err := readPCAPGo(pcapPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("PCAP read failed: %w", err)
|
||||
}
|
||||
|
||||
// Enrich records with bandwidth data
|
||||
enrichRecords(records, packets)
|
||||
|
||||
// Write enriched CSV
|
||||
if err := writeCSV(csvPath, records); err != nil {
|
||||
return fmt.Errorf("CSV write failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCSV(path string) ([]*QueryRecord, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r := csv.NewReader(f)
|
||||
rows, err := r.ReadAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(rows) < 2 {
|
||||
return nil, fmt.Errorf("CSV has no data rows")
|
||||
}
|
||||
|
||||
records := make([]*QueryRecord, 0, len(rows)-1)
|
||||
for i := 1; i < len(rows); i++ {
|
||||
row := rows[i]
|
||||
if len(row) < 14 {
|
||||
log.Printf(" Warning: Skipping malformed row %d", i+1)
|
||||
continue
|
||||
}
|
||||
|
||||
durationNs, _ := strconv.ParseInt(row[8], 10, 64)
|
||||
durationMs, _ := strconv.ParseFloat(row[9], 64)
|
||||
reqSize, _ := strconv.Atoi(row[10])
|
||||
respSize, _ := strconv.Atoi(row[11])
|
||||
|
||||
records = append(records, &QueryRecord{
|
||||
Domain: row[0],
|
||||
QueryType: row[1],
|
||||
Protocol: row[2],
|
||||
DNSSec: row[3],
|
||||
AuthDNSSec: row[4],
|
||||
KeepAlive: row[5],
|
||||
DNSServer: row[6],
|
||||
Timestamp: row[7],
|
||||
DurationNs: durationNs,
|
||||
DurationMs: durationMs,
|
||||
RequestSizeBytes: reqSize,
|
||||
ResponseSizeBytes: respSize,
|
||||
ResponseCode: row[12],
|
||||
Error: row[13],
|
||||
})
|
||||
}
|
||||
|
||||
return records, nil
|
||||
}
|
||||
|
||||
type PacketInfo struct {
|
||||
Timestamp time.Time
|
||||
Size int
|
||||
IsSent bool
|
||||
}
|
||||
|
||||
func readPCAPGo(path string) ([]PacketInfo, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
reader, err := pcapgo.NewReader(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var packets []PacketInfo
|
||||
packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
|
||||
|
||||
for packet := range packetSource.Packets() {
|
||||
if packet.NetworkLayer() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
isDNS := false
|
||||
isSent := false
|
||||
|
||||
// Check UDP layer (DNS, DoQ, DoH3)
|
||||
if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
|
||||
udp := udpLayer.(*layers.UDP)
|
||||
isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
|
||||
udp.SrcPort == 853 || udp.DstPort == 853 ||
|
||||
udp.SrcPort == 443 || udp.DstPort == 443
|
||||
isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
|
||||
}
|
||||
|
||||
// Check TCP layer (DoT, DoH)
|
||||
if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
|
||||
tcp := tcpLayer.(*layers.TCP)
|
||||
isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
|
||||
tcp.SrcPort == 853 || tcp.DstPort == 853 ||
|
||||
tcp.SrcPort == 443 || tcp.DstPort == 443
|
||||
isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
|
||||
}
|
||||
|
||||
if isDNS {
|
||||
packets = append(packets, PacketInfo{
|
||||
Timestamp: packet.Metadata().Timestamp,
|
||||
Size: len(packet.Data()),
|
||||
IsSent: isSent,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return packets, nil
|
||||
}
|
||||
|
||||
func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
|
||||
for _, rec := range records {
|
||||
ts, err := parseRFC3339Nano(rec.Timestamp)
|
||||
if err != nil {
|
||||
log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp)
|
||||
continue
|
||||
}
|
||||
|
||||
// Define time window for this query
|
||||
windowStart := ts
|
||||
windowEnd := ts.Add(time.Duration(rec.DurationNs))
|
||||
|
||||
var sent, recv, pktSent, pktRecv int64
|
||||
|
||||
// Match packets within the time window
|
||||
for _, pkt := range packets {
|
||||
if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
|
||||
pkt.Timestamp.Before(windowEnd) {
|
||||
if pkt.IsSent {
|
||||
sent += int64(pkt.Size)
|
||||
pktSent++
|
||||
} else {
|
||||
recv += int64(pkt.Size)
|
||||
pktRecv++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rec.BytesSent = sent
|
||||
rec.BytesReceived = recv
|
||||
rec.PacketsSent = pktSent
|
||||
rec.PacketsReceived = pktRecv
|
||||
rec.TotalBytes = sent + recv
|
||||
}
|
||||
}
|
||||
|
||||
func writeCSV(path string, records []*QueryRecord) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
w := csv.NewWriter(f)
|
||||
defer w.Flush()
|
||||
|
||||
// Write header
|
||||
header := []string{
|
||||
"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
|
||||
"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
|
||||
"request_size_bytes", "response_size_bytes", "response_code", "error",
|
||||
"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
|
||||
}
|
||||
if err := w.Write(header); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write data rows
|
||||
for _, rec := range records {
|
||||
row := []string{
|
||||
rec.Domain,
|
||||
rec.QueryType,
|
||||
rec.Protocol,
|
||||
rec.DNSSec,
|
||||
rec.AuthDNSSec,
|
||||
rec.KeepAlive,
|
||||
rec.DNSServer,
|
||||
rec.Timestamp,
|
||||
strconv.FormatInt(rec.DurationNs, 10),
|
||||
strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
|
||||
strconv.Itoa(rec.RequestSizeBytes),
|
||||
strconv.Itoa(rec.ResponseSizeBytes),
|
||||
rec.ResponseCode,
|
||||
rec.Error,
|
||||
strconv.FormatInt(rec.BytesSent, 10),
|
||||
strconv.FormatInt(rec.BytesReceived, 10),
|
||||
strconv.FormatInt(rec.PacketsSent, 10),
|
||||
strconv.FormatInt(rec.PacketsReceived, 10),
|
||||
strconv.FormatInt(rec.TotalBytes, 10),
|
||||
}
|
||||
if err := w.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
resultsDir := "results"
|
||||
providers := []string{"adguard", "cloudflare", "google", "quad9"}
|
||||
|
||||
fmt.Println("╔═══════════════════════════════════════════════╗")
|
||||
fmt.Println("║ DNS PCAP Preprocessor v1.0 ║")
|
||||
fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║")
|
||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
||||
|
||||
totalProcessed := 0
|
||||
totalSkipped := 0
|
||||
totalErrors := 0
|
||||
|
||||
for _, provider := range providers {
|
||||
providerPath := filepath.Join(resultsDir, provider)
|
||||
if _, err := os.Stat(providerPath); os.IsNotExist(err) {
|
||||
fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := processProviderFolder(providerPath); err != nil {
|
||||
log.Printf("Error processing %s: %v\n", provider, err)
|
||||
totalErrors++
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("\n╔═══════════════════════════════════════════════╗")
|
||||
fmt.Println("║ Preprocessing Complete! ║")
|
||||
fmt.Println("╚═══════════════════════════════════════════════╝")
|
||||
fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
|
||||
fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n")
|
||||
fmt.Printf(" • bytes_received - Total bytes received from DNS server\n")
|
||||
fmt.Printf(" • packets_sent - Number of packets sent\n")
|
||||
fmt.Printf(" • packets_received - Number of packets received\n")
|
||||
fmt.Printf(" • total_bytes - Sum of sent + received bytes\n")
|
||||
fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
|
||||
fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
|
||||
fmt.Printf(" but all files now have complete bandwidth metrics!\n")
|
||||
}
|
||||
@@ -1,250 +1,362 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Add network metrics from PCAP files to DNS CSV files.
|
||||
Adds: raw_bytes_total, raw_packet_count, overhead_bytes, efficiency_percent
|
||||
Fast PCAP Preprocessor for DNS QoS Analysis
|
||||
Loads PCAP into memory first, then uses binary search for matching.
|
||||
Uses LAN IP to determine direction (LAN = sent, non-LAN = received).
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import argparse
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from scapy.all import rdpcap
|
||||
from typing import Dict, List, NamedTuple
|
||||
import time
|
||||
|
||||
def parse_timestamp(ts_str):
|
||||
"""Parse timestamp with timezone and nanoseconds (RFC3339Nano)."""
|
||||
match = re.match(
|
||||
r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)([\+\-]\d{2}:\d{2})',
|
||||
ts_str
|
||||
)
|
||||
|
||||
if not match:
|
||||
raise ValueError(f"Invalid timestamp format: {ts_str}")
|
||||
|
||||
base, nanos, tz = match.groups()
|
||||
micros = nanos[:6].ljust(6, '0')
|
||||
iso_str = f"{base}.{micros}{tz}"
|
||||
dt = datetime.fromisoformat(iso_str)
|
||||
full_nanos = int(nanos.ljust(9, '0'))
|
||||
|
||||
return dt, full_nanos
|
||||
import dpkt
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
def read_pcap(pcap_path):
|
||||
"""Read PCAP and return list of (timestamp_epoch, size)."""
|
||||
|
||||
class Packet(NamedTuple):
|
||||
"""Lightweight packet representation."""
|
||||
timestamp: float
|
||||
size: int
|
||||
is_outbound: bool # True if from LAN, False if from internet
|
||||
|
||||
|
||||
class QueryWindow:
|
||||
"""Efficient query window representation."""
|
||||
__slots__ = ['index', 'start', 'end', 'sent', 'received', 'pkts_sent', 'pkts_received']
|
||||
|
||||
def __init__(self, index: int, start: float, end: float):
|
||||
self.index = index
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.sent = 0
|
||||
self.received = 0
|
||||
self.pkts_sent = 0
|
||||
self.pkts_received = 0
|
||||
|
||||
|
||||
def parse_csv_timestamp(ts_str: str) -> float:
|
||||
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
|
||||
dt = date_parser.isoparse(ts_str)
|
||||
return dt.timestamp()
|
||||
|
||||
|
||||
def is_lan_ip(ip_bytes: bytes) -> bool:
|
||||
"""Check if IP is a private/LAN address."""
|
||||
if len(ip_bytes) != 4:
|
||||
return False
|
||||
|
||||
first = ip_bytes[0]
|
||||
second = ip_bytes[1]
|
||||
|
||||
# 10.0.0.0/8
|
||||
if first == 10:
|
||||
return True
|
||||
|
||||
# 172.16.0.0/12
|
||||
if first == 172 and 16 <= second <= 31:
|
||||
return True
|
||||
|
||||
# 192.168.0.0/16
|
||||
if first == 192 and second == 168:
|
||||
return True
|
||||
|
||||
# 127.0.0.0/8 (localhost)
|
||||
if first == 127:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def load_pcap_into_memory(pcap_path: Path) -> List[Packet]:
|
||||
"""Load all packets from PCAP into memory with minimal data."""
|
||||
packets = []
|
||||
|
||||
print(f" Loading PCAP into memory...")
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
pkts = rdpcap(str(pcap_path))
|
||||
for pkt in pkts:
|
||||
timestamp = float(pkt.time)
|
||||
length = len(pkt)
|
||||
packets.append((timestamp, length))
|
||||
with open(pcap_path, 'rb') as f:
|
||||
try:
|
||||
pcap = dpkt.pcap.Reader(f)
|
||||
except:
|
||||
# Try pcapng format
|
||||
f.seek(0)
|
||||
pcap = dpkt.pcapng.Reader(f)
|
||||
|
||||
for ts, buf in pcap:
|
||||
try:
|
||||
packet_time = float(ts)
|
||||
packet_size = len(buf)
|
||||
|
||||
# Parse to get source IP
|
||||
eth = dpkt.ethernet.Ethernet(buf)
|
||||
|
||||
# Default to outbound if we can't determine
|
||||
is_outbound = True
|
||||
|
||||
if isinstance(eth.data, dpkt.ip.IP):
|
||||
ip = eth.data
|
||||
src_ip = ip.src
|
||||
is_outbound = is_lan_ip(src_ip)
|
||||
|
||||
packets.append(Packet(
|
||||
timestamp=packet_time,
|
||||
size=packet_size,
|
||||
is_outbound=is_outbound
|
||||
))
|
||||
|
||||
except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, AttributeError):
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error reading PCAP: {e}")
|
||||
print(f" Error reading PCAP: {e}")
|
||||
return []
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f" Loaded {len(packets):,} packets in {elapsed:.2f}s")
|
||||
|
||||
# Sort by timestamp for binary search
|
||||
packets.sort(key=lambda p: p.timestamp)
|
||||
|
||||
return packets
|
||||
|
||||
def find_packets_in_window(packets, start_ts, start_nanos, duration_ns):
|
||||
"""Find packets within exact time window."""
|
||||
start_epoch = start_ts.timestamp()
|
||||
start_epoch += (start_nanos % 1_000_000) / 1_000_000_000
|
||||
end_epoch = start_epoch + (duration_ns / 1_000_000_000)
|
||||
|
||||
total_bytes = 0
|
||||
packet_count = 0
|
||||
|
||||
for pkt_ts, pkt_len in packets:
|
||||
if start_epoch <= pkt_ts <= end_epoch:
|
||||
total_bytes += pkt_len
|
||||
packet_count += 1
|
||||
|
||||
return total_bytes, packet_count
|
||||
|
||||
def enhance_csv(csv_path, pcap_path, output_path, debug=False):
|
||||
"""Add PCAP metrics to CSV."""
|
||||
if not os.path.exists(pcap_path):
|
||||
print(f"⚠️ PCAP not found: {pcap_path}")
|
||||
return False
|
||||
|
||||
print(f"Processing: {os.path.basename(csv_path)}")
|
||||
|
||||
# Read PCAP
|
||||
packets = read_pcap(pcap_path)
|
||||
print(f" Loaded {len(packets)} packets")
|
||||
|
||||
def find_packets_in_window(
|
||||
packets: List[Packet],
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
left_hint: int = 0
|
||||
) -> tuple[List[Packet], int]:
|
||||
"""
|
||||
Binary search to find all packets within time window.
|
||||
Returns (matching_packets, left_index_hint_for_next_search).
|
||||
"""
|
||||
if not packets:
|
||||
print(" ❌ No packets found")
|
||||
return False
|
||||
return [], 0
|
||||
|
||||
if packets and debug:
|
||||
first_pcap = packets[0][0]
|
||||
last_pcap = packets[-1][0]
|
||||
print(f" First PCAP packet: {first_pcap:.6f}")
|
||||
print(f" Last PCAP packet: {last_pcap:.6f}")
|
||||
print(f" PCAP duration: {(last_pcap - first_pcap):.3f}s")
|
||||
# Binary search for first packet >= start_time
|
||||
left, right = left_hint, len(packets) - 1
|
||||
first_idx = len(packets)
|
||||
|
||||
# Read CSV
|
||||
with open(csv_path, 'r', newline='') as f:
|
||||
reader = csv.DictReader(f)
|
||||
fieldnames = list(reader.fieldnames) + [
|
||||
'raw_bytes_total',
|
||||
'raw_packet_count',
|
||||
'overhead_bytes',
|
||||
'efficiency_percent'
|
||||
]
|
||||
rows = list(reader)
|
||||
while left <= right:
|
||||
mid = (left + right) // 2
|
||||
if packets[mid].timestamp >= start_time:
|
||||
first_idx = mid
|
||||
right = mid - 1
|
||||
else:
|
||||
left = mid + 1
|
||||
|
||||
if rows and debug:
|
||||
try:
|
||||
first_ts, _ = parse_timestamp(rows[0]['timestamp'])
|
||||
last_ts, _ = parse_timestamp(rows[-1]['timestamp'])
|
||||
print(f" First CSV query: {first_ts.timestamp():.6f}")
|
||||
print(f" Last CSV query: {last_ts.timestamp():.6f}")
|
||||
offset = packets[0][0] - first_ts.timestamp()
|
||||
print(f" Time offset (PCAP - CSV): {offset:.3f}s")
|
||||
except:
|
||||
pass
|
||||
# No packets in range
|
||||
if first_idx >= len(packets) or packets[first_idx].timestamp > end_time:
|
||||
return [], first_idx
|
||||
|
||||
# Enhance rows
|
||||
enhanced = []
|
||||
matched = 0
|
||||
# Collect all packets in window
|
||||
matching = []
|
||||
idx = first_idx
|
||||
while idx < len(packets) and packets[idx].timestamp <= end_time:
|
||||
matching.append(packets[idx])
|
||||
idx += 1
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
try:
|
||||
timestamp, nanos = parse_timestamp(row['timestamp'])
|
||||
duration_ns = int(row['duration_ns'])
|
||||
|
||||
raw_bytes, packet_count = find_packets_in_window(
|
||||
packets, timestamp, nanos, duration_ns
|
||||
)
|
||||
|
||||
useful_bytes = (
|
||||
int(row['request_size_bytes']) +
|
||||
int(row['response_size_bytes'])
|
||||
)
|
||||
overhead = raw_bytes - useful_bytes
|
||||
efficiency = (
|
||||
(useful_bytes / raw_bytes * 100)
|
||||
if raw_bytes > 0 else 0
|
||||
)
|
||||
|
||||
row['raw_bytes_total'] = raw_bytes
|
||||
row['raw_packet_count'] = packet_count
|
||||
row['overhead_bytes'] = overhead
|
||||
row['efficiency_percent'] = f"{efficiency:.2f}"
|
||||
|
||||
if raw_bytes > 0:
|
||||
matched += 1
|
||||
|
||||
# Debug first few queries
|
||||
if debug and i < 3:
|
||||
print(f" Query {i}: {row['domain']}")
|
||||
print(f" Duration: {duration_ns / 1e6:.3f}ms")
|
||||
print(f" Matched packets: {packet_count}")
|
||||
print(f" Raw bytes: {raw_bytes}")
|
||||
print(f" Useful bytes: {useful_bytes}")
|
||||
print(f" Efficiency: {efficiency:.2f}%")
|
||||
|
||||
except (ValueError, KeyError) as e:
|
||||
if debug:
|
||||
print(f" Error processing row {i}: {e}")
|
||||
row['raw_bytes_total'] = 0
|
||||
row['raw_packet_count'] = 0
|
||||
row['overhead_bytes'] = 0
|
||||
row['efficiency_percent'] = "0.00"
|
||||
|
||||
enhanced.append(row)
|
||||
|
||||
print(f" Matched: {matched}/{len(rows)} queries")
|
||||
|
||||
if matched == 0:
|
||||
print(" ⚠️ WARNING: No queries matched any packets!")
|
||||
print(" This might indicate timestamp misalignment.")
|
||||
|
||||
# Write output
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, 'w', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
writer.writerows(enhanced)
|
||||
|
||||
print(f" ✓ Saved: {output_path}")
|
||||
return True
|
||||
return matching, first_idx
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Add PCAP network metrics to DNS CSV files'
|
||||
)
|
||||
parser.add_argument('input_dir', help='Input directory (e.g., results)')
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
default='./results_enriched',
|
||||
help='Output directory (default: ./results_enriched)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Preview files without processing'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
action='store_true',
|
||||
help='Show detailed timing information'
|
||||
)
|
||||
|
||||
def load_csv_queries(csv_path: Path) -> List[Dict]:
|
||||
"""Load CSV and create query data structures."""
|
||||
queries = []
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
try:
|
||||
ts_epoch = parse_csv_timestamp(row['timestamp'])
|
||||
duration_s = float(row['duration_ns']) / 1e9
|
||||
queries.append({
|
||||
'data': row,
|
||||
'start_time': ts_epoch,
|
||||
'end_time': ts_epoch + duration_s,
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" Warning: Skipping row - {e}")
|
||||
continue
|
||||
return queries
|
||||
|
||||
|
||||
def match_packets_to_queries(
|
||||
packets: List[Packet],
|
||||
queries: List[Dict]
|
||||
) -> List[Dict]:
|
||||
"""Match packets to query windows using binary search."""
|
||||
if not queries or not packets:
|
||||
return queries
|
||||
|
||||
args = parser.parse_args()
|
||||
print(f" Matching packets to queries...")
|
||||
start_time = time.time()
|
||||
|
||||
print("=" * 60)
|
||||
print("ENHANCE DNS CSVs WITH PCAP METRICS")
|
||||
print("=" * 60)
|
||||
print(f"Input: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
if args.debug:
|
||||
print("Debug: ENABLED")
|
||||
print()
|
||||
# Initialize metrics
|
||||
for q in queries:
|
||||
q['bytes_sent'] = 0
|
||||
q['bytes_received'] = 0
|
||||
q['packets_sent'] = 0
|
||||
q['packets_received'] = 0
|
||||
q['total_bytes'] = 0
|
||||
|
||||
# Find CSV files
|
||||
csv_files = list(Path(args.input_dir).rglob('*.csv'))
|
||||
# Sort queries by start time for sequential processing
|
||||
queries_sorted = sorted(enumerate(queries), key=lambda x: x[1]['start_time'])
|
||||
|
||||
if not csv_files:
|
||||
print("❌ No CSV files found")
|
||||
return 1
|
||||
matched_packets = 0
|
||||
left_hint = 0 # Optimization: start next search from here
|
||||
|
||||
print(f"Found {len(csv_files)} CSV files\n")
|
||||
for original_idx, q in queries_sorted:
|
||||
matching, left_hint = find_packets_in_window(
|
||||
packets,
|
||||
q['start_time'],
|
||||
q['end_time'],
|
||||
left_hint
|
||||
)
|
||||
|
||||
for pkt in matching:
|
||||
matched_packets += 1
|
||||
if pkt.is_outbound:
|
||||
q['bytes_sent'] += pkt.size
|
||||
q['packets_sent'] += 1
|
||||
else:
|
||||
q['bytes_received'] += pkt.size
|
||||
q['packets_received'] += 1
|
||||
|
||||
q['total_bytes'] = q['bytes_sent'] + q['bytes_received']
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN - would process:")
|
||||
for csv_path in csv_files:
|
||||
pcap_path = csv_path.with_suffix('.pcap')
|
||||
print(f" {csv_path.relative_to(args.input_dir)}")
|
||||
print(f" PCAP: {'✓' if pcap_path.exists() else '✗'}")
|
||||
return 0
|
||||
elapsed = time.time() - start_time
|
||||
print(f" Matched {matched_packets:,} packets in {elapsed:.2f}s")
|
||||
|
||||
# Process files
|
||||
success = 0
|
||||
failed = 0
|
||||
# Statistics
|
||||
total_sent = sum(q['bytes_sent'] for q in queries)
|
||||
total_recv = sum(q['bytes_received'] for q in queries)
|
||||
queries_with_data = sum(1 for q in queries if q['total_bytes'] > 0)
|
||||
print(f" Total: {total_sent:,} bytes sent, {total_recv:,} bytes received")
|
||||
print(f" Queries with data: {queries_with_data}/{len(queries)}")
|
||||
|
||||
return queries
|
||||
|
||||
|
||||
def write_enriched_csv(
|
||||
csv_path: Path, queries: List[Dict], backup: bool = True
|
||||
):
|
||||
"""Write enriched CSV with bandwidth columns."""
|
||||
if backup and csv_path.exists():
|
||||
backup_path = csv_path.with_suffix('.csv.bak')
|
||||
if not backup_path.exists(): # Don't overwrite existing backup
|
||||
shutil.copy2(csv_path, backup_path)
|
||||
print(f" Backup: {backup_path.name}")
|
||||
|
||||
# Get fieldnames
|
||||
original_fields = list(queries[0]['data'].keys())
|
||||
new_fields = [
|
||||
'bytes_sent',
|
||||
'bytes_received',
|
||||
'packets_sent',
|
||||
'packets_received',
|
||||
'total_bytes',
|
||||
]
|
||||
fieldnames = original_fields + new_fields
|
||||
|
||||
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for q in queries:
|
||||
row = q['data'].copy()
|
||||
for field in new_fields:
|
||||
row[field] = q[field]
|
||||
writer.writerow(row)
|
||||
|
||||
print(f" Written: {csv_path.name}")
|
||||
|
||||
|
||||
def process_provider_directory(provider_path: Path):
|
||||
"""Process all CSV/PCAP pairs in a provider directory."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Processing: {provider_path.name.upper()}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
csv_files = sorted(provider_path.glob('*.csv'))
|
||||
processed = 0
|
||||
total_time = 0
|
||||
|
||||
for csv_path in csv_files:
|
||||
pcap_path = csv_path.with_suffix('.pcap')
|
||||
rel_path = csv_path.relative_to(args.input_dir)
|
||||
output_path = Path(args.output) / rel_path
|
||||
# Skip backup files
|
||||
if '.bak' in csv_path.name:
|
||||
continue
|
||||
|
||||
if enhance_csv(str(csv_path), str(pcap_path), str(output_path),
|
||||
args.debug):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
print()
|
||||
pcap_path = csv_path.with_suffix('.pcap')
|
||||
|
||||
if not pcap_path.exists():
|
||||
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
|
||||
continue
|
||||
|
||||
print(f"\n 📁 {csv_path.name}")
|
||||
file_start = time.time()
|
||||
|
||||
# Load PCAP into memory first
|
||||
packets = load_pcap_into_memory(pcap_path)
|
||||
if not packets:
|
||||
print(f" ⚠ No packets found in PCAP")
|
||||
continue
|
||||
|
||||
# Load CSV queries
|
||||
queries = load_csv_queries(csv_path)
|
||||
if not queries:
|
||||
print(f" ⚠ No valid queries found")
|
||||
continue
|
||||
|
||||
print(f" Loaded {len(queries):,} queries")
|
||||
|
||||
# Match packets to queries
|
||||
enriched_queries = match_packets_to_queries(packets, queries)
|
||||
|
||||
# Write enriched CSV
|
||||
write_enriched_csv(csv_path, enriched_queries)
|
||||
|
||||
file_time = time.time() - file_start
|
||||
total_time += file_time
|
||||
processed += 1
|
||||
print(f" ✓ Completed in {file_time:.2f}s")
|
||||
|
||||
# Summary
|
||||
print("=" * 60)
|
||||
print(f"✓ Success: {success}")
|
||||
print(f"✗ Failed: {failed}")
|
||||
print(f"Total: {len(csv_files)}")
|
||||
print(f"\nOutput: {args.output}")
|
||||
|
||||
return 0 if failed == 0 else 1
|
||||
print(f"\n {'='*58}")
|
||||
print(f" {provider_path.name}: {processed} files in {total_time:.2f}s")
|
||||
print(f" {'='*58}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
|
||||
def main():
|
||||
"""Main preprocessing pipeline."""
|
||||
overall_start = time.time()
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("DNS PCAP PREPROCESSOR - Memory-Optimized Edition")
|
||||
print("="*60)
|
||||
|
||||
results_dir = Path('results')
|
||||
|
||||
if not results_dir.exists():
|
||||
print(f"\n❌ Error: '{results_dir}' directory not found")
|
||||
return
|
||||
|
||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
||||
|
||||
for provider in providers:
|
||||
provider_path = results_dir / provider
|
||||
if provider_path.exists():
|
||||
process_provider_directory(provider_path)
|
||||
else:
|
||||
print(f"\n⚠ Warning: Provider directory not found: {provider}")
|
||||
|
||||
overall_time = time.time() - overall_start
|
||||
|
||||
print("\n" + "="*60)
|
||||
print(f"✓ PREPROCESSING COMPLETE")
|
||||
print(f" Total time: {overall_time:.2f}s ({overall_time/60:.1f} minutes)")
|
||||
print("="*60 + "\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
426
scripts/tools/csvs_to_sqlite.py
Normal file
426
scripts/tools/csvs_to_sqlite.py
Normal file
@@ -0,0 +1,426 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert DNS CSV files to SQLite database.
|
||||
Creates a single normalized table with unified DNSSEC handling.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
|
||||
def create_database_schema(conn: sqlite3.Connection):
|
||||
"""Create the database schema with indexes."""
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Main queries table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS dns_queries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
||||
-- Metadata
|
||||
provider TEXT NOT NULL,
|
||||
protocol TEXT NOT NULL,
|
||||
dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
|
||||
|
||||
-- Query details
|
||||
domain TEXT NOT NULL,
|
||||
query_type TEXT NOT NULL,
|
||||
keep_alive BOOLEAN NOT NULL,
|
||||
dns_server TEXT NOT NULL,
|
||||
|
||||
-- Timing
|
||||
timestamp TEXT NOT NULL,
|
||||
timestamp_unix REAL NOT NULL,
|
||||
duration_ns INTEGER NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
|
||||
-- Size metrics
|
||||
request_size_bytes INTEGER,
|
||||
response_size_bytes INTEGER,
|
||||
|
||||
-- Network metrics (from PCAP)
|
||||
bytes_sent INTEGER DEFAULT 0,
|
||||
bytes_received INTEGER DEFAULT 0,
|
||||
packets_sent INTEGER DEFAULT 0,
|
||||
packets_received INTEGER DEFAULT 0,
|
||||
total_bytes INTEGER DEFAULT 0,
|
||||
|
||||
-- Response
|
||||
response_code TEXT,
|
||||
error TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for common queries
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_provider
|
||||
ON dns_queries(provider)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_protocol
|
||||
ON dns_queries(protocol)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_dnssec_mode
|
||||
ON dns_queries(dnssec_mode)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_keep_alive
|
||||
ON dns_queries(keep_alive)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec
|
||||
ON dns_queries(provider, protocol, dnssec_mode)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_timestamp
|
||||
ON dns_queries(timestamp_unix)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_domain
|
||||
ON dns_queries(domain)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
|
||||
"""
|
||||
Extract base protocol, DNSSEC mode, and keep_alive from filename.
|
||||
Returns (base_protocol, dnssec_mode, keep_alive)
|
||||
|
||||
Examples:
|
||||
'udp.csv' -> ('udp', 'off', False)
|
||||
'udp-auth.csv' -> ('udp', 'auth', False)
|
||||
'tls.csv' -> ('tls', 'off', False)
|
||||
'tls-persist.csv' -> ('tls', 'off', True)
|
||||
'https-persist.csv' -> ('https', 'off', True)
|
||||
'https-auth-persist.csv' -> ('https', 'auth', True)
|
||||
'https-trust-persist.csv' -> ('https', 'trust', True)
|
||||
'doh3-auth.csv' -> ('doh3', 'auth', False)
|
||||
'doq.csv' -> ('doq', 'off', False)
|
||||
"""
|
||||
name = filename.replace('.csv', '')
|
||||
|
||||
# Check for persist suffix (keep_alive)
|
||||
keep_alive = False
|
||||
if name.endswith('-persist'):
|
||||
keep_alive = True
|
||||
name = name.replace('-persist', '')
|
||||
|
||||
# Check for DNSSEC suffix
|
||||
dnssec_mode = 'off'
|
||||
if name.endswith('-auth'):
|
||||
dnssec_mode = 'auth'
|
||||
name = name.replace('-auth', '')
|
||||
elif name.endswith('-trust'):
|
||||
dnssec_mode = 'trust'
|
||||
name = name.replace('-trust', '')
|
||||
|
||||
# For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
|
||||
if name in ['udp', 'doh3', 'doq']:
|
||||
keep_alive = False
|
||||
|
||||
return (name, dnssec_mode, keep_alive)
|
||||
|
||||
|
||||
def str_to_bool(value: str) -> bool:
|
||||
"""Convert string boolean to Python bool."""
|
||||
return value.lower() in ('true', '1', 'yes')
|
||||
|
||||
|
||||
def import_csv_to_db(
|
||||
csv_path: Path,
|
||||
provider: str,
|
||||
conn: sqlite3.Connection
|
||||
) -> int:
|
||||
"""Import a CSV file into the database."""
|
||||
protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
|
||||
|
||||
cursor = conn.cursor()
|
||||
rows_imported = 0
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
|
||||
for row in reader:
|
||||
try:
|
||||
# Parse timestamp to Unix epoch
|
||||
dt = date_parser.isoparse(row['timestamp'])
|
||||
timestamp_unix = dt.timestamp()
|
||||
|
||||
# Use keep_alive from filename (more reliable than CSV)
|
||||
keep_alive = keep_alive_from_filename
|
||||
|
||||
# Handle optional fields (may not exist in older CSVs)
|
||||
bytes_sent = int(row.get('bytes_sent', 0) or 0)
|
||||
bytes_received = int(row.get('bytes_received', 0) or 0)
|
||||
packets_sent = int(row.get('packets_sent', 0) or 0)
|
||||
packets_received = int(row.get('packets_received', 0) or 0)
|
||||
total_bytes = int(row.get('total_bytes', 0) or 0)
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO dns_queries (
|
||||
provider, protocol, dnssec_mode,
|
||||
domain, query_type, keep_alive,
|
||||
dns_server, timestamp, timestamp_unix,
|
||||
duration_ns, duration_ms,
|
||||
request_size_bytes, response_size_bytes,
|
||||
bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
|
||||
response_code, error
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
provider,
|
||||
protocol,
|
||||
dnssec_mode,
|
||||
row['domain'],
|
||||
row['query_type'],
|
||||
keep_alive,
|
||||
row['dns_server'],
|
||||
row['timestamp'],
|
||||
timestamp_unix,
|
||||
int(row['duration_ns']),
|
||||
float(row['duration_ms']),
|
||||
int(row.get('request_size_bytes') or 0),
|
||||
int(row.get('response_size_bytes') or 0),
|
||||
bytes_sent,
|
||||
bytes_received,
|
||||
packets_sent,
|
||||
packets_received,
|
||||
total_bytes,
|
||||
row.get('response_code', ''),
|
||||
row.get('error', '')
|
||||
))
|
||||
|
||||
rows_imported += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" Warning: Skipping row - {e}")
|
||||
continue
|
||||
|
||||
conn.commit()
|
||||
return rows_imported
|
||||
|
||||
|
||||
def main():
|
||||
"""Main import pipeline."""
|
||||
print("\n" + "="*60)
|
||||
print("CSV to SQLite Database Converter")
|
||||
print("="*60)
|
||||
|
||||
results_dir = Path('results')
|
||||
db_path = Path('dns.db')
|
||||
|
||||
if not results_dir.exists():
|
||||
print(f"\n❌ Error: '{results_dir}' directory not found")
|
||||
return
|
||||
|
||||
# Remove existing database
|
||||
if db_path.exists():
|
||||
print(f"\n⚠ Removing existing database: {db_path}")
|
||||
db_path.unlink()
|
||||
|
||||
# Create database and schema
|
||||
print(f"\n📊 Creating database: {db_path}")
|
||||
conn = sqlite3.connect(db_path)
|
||||
create_database_schema(conn)
|
||||
print("✓ Schema created")
|
||||
|
||||
# Import CSVs
|
||||
providers = ['adguard', 'cloudflare', 'google', 'quad9']
|
||||
total_rows = 0
|
||||
total_files = 0
|
||||
|
||||
for provider in providers:
|
||||
provider_path = results_dir / provider
|
||||
|
||||
if not provider_path.exists():
|
||||
print(f"\n⚠ Skipping {provider} - directory not found")
|
||||
continue
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Importing: {provider.upper()}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
csv_files = sorted(provider_path.glob('*.csv'))
|
||||
provider_rows = 0
|
||||
provider_files = 0
|
||||
|
||||
for csv_path in csv_files:
|
||||
# Skip backup files
|
||||
if '.bak' in csv_path.name:
|
||||
continue
|
||||
|
||||
protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
|
||||
ka_str = "persistent" if keep_alive else "non-persist"
|
||||
print(f" 📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
|
||||
|
||||
rows = import_csv_to_db(csv_path, provider, conn)
|
||||
print(f" ✓ Imported {rows:,} rows")
|
||||
|
||||
provider_rows += rows
|
||||
provider_files += 1
|
||||
|
||||
print(f"\n Total: {provider_files} files, {provider_rows:,} rows")
|
||||
total_rows += provider_rows
|
||||
total_files += provider_files
|
||||
|
||||
# Create summary
|
||||
print(f"\n{'='*60}")
|
||||
print("Database Summary")
|
||||
print(f"{'='*60}")
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Total counts
|
||||
cursor.execute("SELECT COUNT(*) FROM dns_queries")
|
||||
total_queries = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
|
||||
unique_providers = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
|
||||
unique_protocols = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
|
||||
unique_domains = cursor.fetchone()[0]
|
||||
|
||||
print(f"\nTotal queries: {total_queries:,}")
|
||||
print(f"Providers: {unique_providers}")
|
||||
print(f"Protocols: {unique_protocols}")
|
||||
print(f"Unique domains: {unique_domains}")
|
||||
|
||||
# Show breakdown by provider, protocol, DNSSEC, and keep_alive
|
||||
print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY provider, protocol, dnssec_mode, keep_alive
|
||||
ORDER BY provider, protocol, dnssec_mode, keep_alive
|
||||
""")
|
||||
|
||||
current_provider = None
|
||||
for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
|
||||
if current_provider != provider:
|
||||
if current_provider is not None:
|
||||
print()
|
||||
current_provider = provider
|
||||
|
||||
ka_str = "✓" if keep_alive else "✗"
|
||||
print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
|
||||
|
||||
# Protocol distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("Protocol Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT protocol, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY protocol
|
||||
ORDER BY protocol
|
||||
""")
|
||||
|
||||
for protocol, count in cursor.fetchall():
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
# DNSSEC mode distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("DNSSEC Mode Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT dnssec_mode, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY dnssec_mode
|
||||
ORDER BY dnssec_mode
|
||||
""")
|
||||
|
||||
for dnssec_mode, count in cursor.fetchall():
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
# Keep-Alive distribution
|
||||
print(f"\n{'-'*80}")
|
||||
print("Keep-Alive Distribution:")
|
||||
print(f"{'-'*80}")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT keep_alive, COUNT(*) as count
|
||||
FROM dns_queries
|
||||
GROUP BY keep_alive
|
||||
""")
|
||||
|
||||
for keep_alive, count in cursor.fetchall():
|
||||
ka_label = "Persistent" if keep_alive else "Non-persistent"
|
||||
pct = (count / total_queries) * 100
|
||||
print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
|
||||
|
||||
conn.close()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✓ Database created successfully: {db_path}")
|
||||
print(f" Total: {total_files} files, {total_rows:,} rows")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Print usage examples
|
||||
print("\n📖 Usage Examples for Metabase:")
|
||||
print(f"{'-'*60}")
|
||||
|
||||
print("\n1. Compare protocols (DNSSEC off, persistent only):")
|
||||
print(""" SELECT provider, protocol,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
WHERE dnssec_mode = 'off' AND keep_alive = 1
|
||||
GROUP BY provider, protocol;""")
|
||||
|
||||
print("\n2. DNSSEC impact on UDP:")
|
||||
print(""" SELECT provider, dnssec_mode,
|
||||
AVG(duration_ms) as avg_latency
|
||||
FROM dns_queries
|
||||
WHERE protocol = 'udp'
|
||||
GROUP BY provider, dnssec_mode;""")
|
||||
|
||||
print("\n3. Keep-alive impact on TLS:")
|
||||
print(""" SELECT provider, keep_alive,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
WHERE protocol = 'tls' AND dnssec_mode = 'off'
|
||||
GROUP BY provider, keep_alive;""")
|
||||
|
||||
print("\n4. Time series for line graphs:")
|
||||
print(""" SELECT timestamp_unix, duration_ms, total_bytes
|
||||
FROM dns_queries
|
||||
WHERE provider = 'cloudflare'
|
||||
AND protocol = 'https'
|
||||
AND dnssec_mode = 'off'
|
||||
AND keep_alive = 1
|
||||
ORDER BY timestamp_unix;""")
|
||||
|
||||
print("\n5. Overall comparison table:")
|
||||
print(""" SELECT protocol, dnssec_mode, keep_alive,
|
||||
COUNT(*) as queries,
|
||||
AVG(duration_ms) as avg_latency,
|
||||
AVG(total_bytes) as avg_bytes
|
||||
FROM dns_queries
|
||||
GROUP BY protocol, dnssec_mode, keep_alive
|
||||
ORDER BY protocol, dnssec_mode, keep_alive;""")
|
||||
|
||||
print(f"\n{'-'*60}\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user