feat(scripts): add scripts to process data
This commit is contained in:
289
scripts/analysis/analyze_dns_metrics.py
Normal file
289
scripts/analysis/analyze_dns_metrics.py
Normal file
@@ -0,0 +1,289 @@
|
||||
import csv
|
||||
import os
|
||||
import statistics
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
def map_server_to_resolver(server):
|
||||
"""Map server address/domain to resolver name"""
|
||||
server_lower = server.lower()
|
||||
|
||||
if '1.1.1.1' in server_lower or 'cloudflare' in server_lower:
|
||||
return 'Cloudflare'
|
||||
elif '8.8.8.8' in server_lower or 'google' in server_lower:
|
||||
return 'Google'
|
||||
elif '9.9.9.9' in server_lower or 'quad9' in server_lower:
|
||||
return 'Quad9'
|
||||
elif 'adguard' in server_lower:
|
||||
return 'AdGuard'
|
||||
else:
|
||||
return server # Fallback to original server name
|
||||
|
||||
def extract_from_new_format(filename):
|
||||
"""Parse new filename format: protocol[-flags]-timestamp.csv"""
|
||||
base = filename.replace('.csv', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 2:
|
||||
return None, None, None, None
|
||||
|
||||
protocol = parts[0]
|
||||
timestamp = parts[-1]
|
||||
|
||||
# Flags are everything between protocol and timestamp
|
||||
flags_str = '-'.join(parts[1:-1])
|
||||
|
||||
# Determine DNSSEC status
|
||||
if 'auth' in flags_str:
|
||||
dnssec_status = 'auth' # Authoritative DNSSEC
|
||||
elif 'trust' in flags_str:
|
||||
dnssec_status = 'trust' # Trust-based DNSSEC
|
||||
else:
|
||||
dnssec_status = 'off'
|
||||
|
||||
keepalive_status = 'on' if 'persist' in flags_str else 'off'
|
||||
|
||||
return protocol, dnssec_status, keepalive_status, flags_str
|
||||
|
||||
def extract_server_info_from_csv(row):
|
||||
"""Extract DNSSEC info from CSV row data"""
|
||||
dnssec = row.get('dnssec', 'false').lower() == 'true'
|
||||
auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true'
|
||||
keepalive = row.get('keep_alive', 'false').lower() == 'true'
|
||||
|
||||
if dnssec:
|
||||
if auth_dnssec:
|
||||
dnssec_status = 'auth'
|
||||
else:
|
||||
dnssec_status = 'trust'
|
||||
else:
|
||||
dnssec_status = 'off'
|
||||
|
||||
keepalive_status = 'on' if keepalive else 'off'
|
||||
|
||||
return dnssec_status, keepalive_status
|
||||
|
||||
def extract_server_info(file_path, row):
|
||||
"""Extract info using directory structure, filename, and CSV data"""
|
||||
path = Path(file_path)
|
||||
|
||||
# First try to get DNSSEC info from CSV row (most accurate)
|
||||
try:
|
||||
csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row)
|
||||
protocol = row.get('protocol', '').lower()
|
||||
|
||||
# Get server from directory structure
|
||||
parts = path.parts
|
||||
if len(parts) >= 4:
|
||||
potential_date = parts[-2]
|
||||
# Check if it's a date like YYYY-MM-DD
|
||||
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
|
||||
server = parts[-3] # resolver folder (e.g., cloudflare)
|
||||
return protocol, server, csv_dnssec_status, csv_keepalive_status
|
||||
|
||||
# Fallback to DNS server field
|
||||
server = row.get('dns_server', '')
|
||||
return protocol, server, csv_dnssec_status, csv_keepalive_status
|
||||
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
# Fallback to filename parsing
|
||||
filename = path.name
|
||||
protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename)
|
||||
|
||||
if protocol:
|
||||
# Get server from directory structure
|
||||
parts = path.parts
|
||||
if len(parts) >= 4:
|
||||
potential_date = parts[-2]
|
||||
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
|
||||
server = parts[-3]
|
||||
return protocol, server, dnssec_status, keepalive_status
|
||||
|
||||
# Fallback to DNS server field
|
||||
server = row.get('dns_server', '')
|
||||
return protocol, server, dnssec_status, keepalive_status
|
||||
|
||||
return None, None, None, None
|
||||
|
||||
def get_dnssec_display_name(dnssec_status):
|
||||
"""Convert DNSSEC status to display name"""
|
||||
if dnssec_status == 'auth':
|
||||
return 'DNSSEC (Authoritative)'
|
||||
elif dnssec_status == 'trust':
|
||||
return 'DNSSEC (Trust-based)'
|
||||
else:
|
||||
return 'No DNSSEC'
|
||||
|
||||
def analyze_dns_data(root_directory, output_file):
|
||||
"""Analyze DNS data and generate metrics"""
|
||||
|
||||
# Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]}
|
||||
measurements = defaultdict(list)
|
||||
|
||||
# Walk through all directories
|
||||
for root, dirs, files in os.walk(root_directory):
|
||||
for file in files:
|
||||
if file.endswith('.csv'):
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"Processing: {file_path}")
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', newline='') as csvfile:
|
||||
reader = csv.DictReader(csvfile)
|
||||
|
||||
for row_num, row in enumerate(reader, 2): # Start at 2 since header is row 1
|
||||
try:
|
||||
protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row)
|
||||
|
||||
if protocol and server:
|
||||
resolver = map_server_to_resolver(server)
|
||||
duration_ms = float(row.get('duration_ms', 0))
|
||||
|
||||
# Only include successful queries
|
||||
if row.get('response_code', '') in ['NOERROR', '']:
|
||||
key = (resolver, protocol, dnssec_status, keepalive_status)
|
||||
measurements[key].append(duration_ms)
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"Data parse error in {file_path} row {row_num}: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing file {file_path}: {e}")
|
||||
continue
|
||||
|
||||
# Calculate statistics grouped by resolver first, then by configuration
|
||||
resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
||||
|
||||
for (resolver, protocol, dnssec, keepalive), durations in measurements.items():
|
||||
if durations:
|
||||
stats = {
|
||||
'protocol': protocol.upper(),
|
||||
'dnssec': dnssec,
|
||||
'keepalive': keepalive,
|
||||
'total_queries': len(durations),
|
||||
'avg_latency_ms': round(statistics.mean(durations), 3),
|
||||
'median_latency_ms': round(statistics.median(durations), 3),
|
||||
'min_latency_ms': round(min(durations), 3),
|
||||
'max_latency_ms': round(max(durations), 3),
|
||||
'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3),
|
||||
'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3),
|
||||
'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3)
|
||||
}
|
||||
# Group by resolver -> dnssec -> keepalive -> protocol
|
||||
resolver_results[resolver][dnssec][keepalive].append(stats)
|
||||
|
||||
# Sort each configuration's results by average latency
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms'])
|
||||
|
||||
# Write to CSV with all data
|
||||
all_results = []
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
for result in resolver_results[resolver][dnssec][keepalive]:
|
||||
result['resolver'] = resolver
|
||||
all_results.append(result)
|
||||
|
||||
with open(output_file, 'w', newline='') as csvfile:
|
||||
fieldnames = [
|
||||
'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries',
|
||||
'avg_latency_ms', 'median_latency_ms', 'min_latency_ms',
|
||||
'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms'
|
||||
]
|
||||
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(all_results)
|
||||
|
||||
print(f"\nAnalysis complete! Full results written to {output_file}")
|
||||
print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}")
|
||||
|
||||
def print_configuration_table(resolver, dnssec_status, keepalive_status, results):
|
||||
"""Print a formatted table for a specific configuration"""
|
||||
ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN"
|
||||
dnssec_display = get_dnssec_display_name(dnssec_status)
|
||||
|
||||
print(f"\n {dnssec_display} - {ka_indicator}")
|
||||
print(" " + "-" * 90)
|
||||
print(f" {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}")
|
||||
print(" " + "-" * 90)
|
||||
|
||||
for result in results:
|
||||
print(f" {result['protocol']:<12} {result['total_queries']:<8} "
|
||||
f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} "
|
||||
f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} "
|
||||
f"{result['p95_latency_ms']:<10}")
|
||||
|
||||
# Print results grouped by resolver first
|
||||
print(f"\n{'=' * 100}")
|
||||
print("DNS RESOLVER PERFORMANCE COMPARISON")
|
||||
print(f"{'=' * 100}")
|
||||
|
||||
for resolver in sorted(resolver_results.keys()):
|
||||
print(f"\n{resolver} DNS Resolver")
|
||||
print("=" * 100)
|
||||
|
||||
# Order configurations logically
|
||||
config_order = [
|
||||
('off', 'off'), # No DNSSEC, New connections
|
||||
('off', 'on'), # No DNSSEC, Persistent
|
||||
('trust', 'off'), # Trust DNSSEC, New connections
|
||||
('trust', 'on'), # Trust DNSSEC, Persistent
|
||||
('auth', 'off'), # Auth DNSSEC, New connections
|
||||
('auth', 'on'), # Auth DNSSEC, Persistent
|
||||
]
|
||||
|
||||
for dnssec_status, keepalive_status in config_order:
|
||||
if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]:
|
||||
results = resolver_results[resolver][dnssec_status][keepalive_status]
|
||||
if results: # Only print if there are results
|
||||
print_configuration_table(resolver, dnssec_status, keepalive_status, results)
|
||||
|
||||
# Summary comparison across resolvers
|
||||
print(f"\n{'=' * 100}")
|
||||
print("CROSS-RESOLVER PROTOCOL COMPARISON")
|
||||
print(f"{'=' * 100}")
|
||||
|
||||
# Group by protocol and configuration for cross-resolver comparison
|
||||
protocol_comparison = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for resolver in resolver_results:
|
||||
for dnssec in resolver_results[resolver]:
|
||||
for keepalive in resolver_results[resolver][dnssec]:
|
||||
for result in resolver_results[resolver][dnssec][keepalive]:
|
||||
config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}"
|
||||
protocol_comparison[result['protocol']][config_key].append({
|
||||
'resolver': resolver,
|
||||
'avg_latency_ms': result['avg_latency_ms'],
|
||||
'total_queries': result['total_queries']
|
||||
})
|
||||
|
||||
for protocol in sorted(protocol_comparison.keys()):
|
||||
print(f"\n{protocol} Protocol Comparison")
|
||||
print("-" * 100)
|
||||
|
||||
for config in sorted(protocol_comparison[protocol].keys()):
|
||||
resolvers_data = protocol_comparison[protocol][config]
|
||||
if resolvers_data:
|
||||
print(f"\n {config}")
|
||||
print(" " + "-" * 60)
|
||||
print(f" {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}")
|
||||
print(" " + "-" * 60)
|
||||
|
||||
# Sort by average latency
|
||||
resolvers_data.sort(key=lambda x: x['avg_latency_ms'])
|
||||
|
||||
for data in resolvers_data:
|
||||
print(f" {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
root_dir = "."
|
||||
output_file = "dns_metrics.csv"
|
||||
|
||||
analyze_dns_data(root_dir, output_file)
|
||||
284
scripts/tools/add_extra_metrics_to_csv.py
Normal file
284
scripts/tools/add_extra_metrics_to_csv.py
Normal file
@@ -0,0 +1,284 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Add network metrics from PCAP files to DNS CSV files.
|
||||
Adds: pcap_network_bytes_in, pcap_network_bytes_out, pcap_overhead_bytes
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
import dpkt
|
||||
import socket
|
||||
|
||||
# Test machine IPs
|
||||
TEST_IPS = {
|
||||
'10.0.0.50',
|
||||
'2001:818:e73e:ba00:5506:dfd4:ed8b:96e',
|
||||
'fe80::fe98:c62e:4463:9a2d'
|
||||
}
|
||||
|
||||
|
||||
def inet_to_str(inet):
|
||||
"""Convert inet bytes to IP string"""
|
||||
try:
|
||||
return socket.inet_ntop(socket.AF_INET, inet)
|
||||
except ValueError:
|
||||
try:
|
||||
return socket.inet_ntop(socket.AF_INET6, inet)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def read_pcap(pcap_path):
|
||||
"""Read PCAP and return list of (timestamp_ns, size, src_ip, dst_ip)"""
|
||||
packets = []
|
||||
|
||||
with open(pcap_path, 'rb') as f:
|
||||
try:
|
||||
pcap = dpkt.pcap.Reader(f)
|
||||
except:
|
||||
f.seek(0)
|
||||
pcap = dpkt.pcapng.Reader(f)
|
||||
|
||||
for ts, buf in pcap:
|
||||
try:
|
||||
# Convert PCAP timestamp (float seconds) to nanoseconds
|
||||
timestamp_ns = int(ts * 1_000_000_000)
|
||||
size = len(buf)
|
||||
eth = dpkt.ethernet.Ethernet(buf)
|
||||
|
||||
src_ip = dst_ip = None
|
||||
|
||||
if isinstance(eth.data, dpkt.ip.IP):
|
||||
src_ip = inet_to_str(eth.data.src)
|
||||
dst_ip = inet_to_str(eth.data.dst)
|
||||
elif isinstance(eth.data, dpkt.ip6.IP6):
|
||||
src_ip = inet_to_str(eth.data.src)
|
||||
dst_ip = inet_to_str(eth.data.dst)
|
||||
|
||||
if src_ip and dst_ip:
|
||||
packets.append((timestamp_ns, size, src_ip, dst_ip))
|
||||
|
||||
except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError):
|
||||
continue
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def find_packets_in_window(packets, start_ns, duration_ns):
|
||||
"""Find packets within exact time window (nanosecond precision)"""
|
||||
end_ns = start_ns + duration_ns
|
||||
|
||||
matching = []
|
||||
for timestamp_ns, size, src_ip, dst_ip in packets:
|
||||
if start_ns <= timestamp_ns <= end_ns:
|
||||
matching.append((size, src_ip, dst_ip))
|
||||
|
||||
return matching
|
||||
|
||||
|
||||
def calculate_metrics(packets):
|
||||
"""Calculate network metrics from packets"""
|
||||
bytes_in = 0
|
||||
bytes_out = 0
|
||||
|
||||
for size, src_ip, dst_ip in packets:
|
||||
if dst_ip in TEST_IPS:
|
||||
bytes_in += size
|
||||
elif src_ip in TEST_IPS:
|
||||
bytes_out += size
|
||||
|
||||
return {
|
||||
'pcap_network_bytes_in': bytes_in,
|
||||
'pcap_network_bytes_out': bytes_out,
|
||||
'pcap_overhead_bytes': bytes_in + bytes_out
|
||||
}
|
||||
|
||||
|
||||
def parse_timestamp_to_ns(ts_str):
|
||||
"""Parse ISO timestamp to nanoseconds since epoch"""
|
||||
try:
|
||||
dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
|
||||
if dt.tzinfo is not None:
|
||||
dt = dt.astimezone(timezone.utc)
|
||||
# Convert to nanoseconds since epoch
|
||||
return int(dt.timestamp() * 1_000_000_000)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def enhance_csv(csv_path, pcap_path, output_path, debug=False):
|
||||
"""Add PCAP metrics to CSV"""
|
||||
if not os.path.exists(pcap_path):
|
||||
print(f"⚠️ PCAP not found: {pcap_path}")
|
||||
return False
|
||||
|
||||
print(f"Processing: {os.path.basename(csv_path)}")
|
||||
|
||||
# Read PCAP
|
||||
try:
|
||||
packets = read_pcap(pcap_path)
|
||||
print(f" Loaded {len(packets)} packets")
|
||||
|
||||
if packets and debug:
|
||||
first_pcap_ns = packets[0][0]
|
||||
last_pcap_ns = packets[-1][0]
|
||||
print(f" First PCAP packet: {first_pcap_ns} ns")
|
||||
print(f" Last PCAP packet: {last_pcap_ns} ns")
|
||||
print(f" PCAP duration: {(last_pcap_ns - first_pcap_ns) / 1e9:.3f}s")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error reading PCAP: {e}")
|
||||
return False
|
||||
|
||||
if not packets:
|
||||
print(" ❌ No packets found")
|
||||
return False
|
||||
|
||||
# Read CSV
|
||||
with open(csv_path, 'r', newline='') as f:
|
||||
reader = csv.DictReader(f)
|
||||
fieldnames = list(reader.fieldnames) + [
|
||||
'pcap_network_bytes_in',
|
||||
'pcap_network_bytes_out',
|
||||
'pcap_overhead_bytes'
|
||||
]
|
||||
rows = list(reader)
|
||||
|
||||
if rows and debug:
|
||||
first_csv_ns = parse_timestamp_to_ns(rows[0]['timestamp'])
|
||||
last_csv_ns = parse_timestamp_to_ns(rows[-1]['timestamp'])
|
||||
if first_csv_ns and last_csv_ns:
|
||||
print(f" First CSV query: {first_csv_ns} ns")
|
||||
print(f" Last CSV query: {last_csv_ns} ns")
|
||||
print(f" CSV duration: {(last_csv_ns - first_csv_ns) / 1e9:.3f}s")
|
||||
|
||||
# Check alignment
|
||||
offset_ns = packets[0][0] - first_csv_ns
|
||||
print(f" Time offset (PCAP - CSV): {offset_ns / 1e9:.3f}s")
|
||||
|
||||
# Enhance rows
|
||||
enhanced = []
|
||||
matched = 0
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
ts_ns = parse_timestamp_to_ns(row['timestamp'])
|
||||
if not ts_ns:
|
||||
continue
|
||||
|
||||
duration_ns = int(row.get('duration_ns', 0))
|
||||
|
||||
matching_packets = find_packets_in_window(packets, ts_ns, duration_ns)
|
||||
|
||||
metrics = calculate_metrics(matching_packets)
|
||||
row.update(metrics)
|
||||
enhanced.append(row)
|
||||
|
||||
if metrics['pcap_overhead_bytes'] > 0:
|
||||
matched += 1
|
||||
|
||||
# Debug first few queries
|
||||
if debug and i < 3:
|
||||
print(f" Query {i}: {row['domain']}")
|
||||
print(f" Start: {ts_ns} ns")
|
||||
print(f" Duration: {duration_ns} ns ({duration_ns / 1e6:.3f}ms)")
|
||||
print(f" End: {ts_ns + duration_ns} ns")
|
||||
print(f" Matched packets: {len(matching_packets)}")
|
||||
print(f" Bytes: {metrics['pcap_overhead_bytes']}")
|
||||
|
||||
print(f" Matched: {matched}/{len(rows)} queries")
|
||||
|
||||
if matched == 0:
|
||||
print(" ⚠️ WARNING: No queries matched any packets!")
|
||||
print(" This might indicate timestamp misalignment.")
|
||||
|
||||
# Write output
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, 'w', newline='') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
writer.writerows(enhanced)
|
||||
|
||||
print(f" ✓ Saved: {output_path}")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Add PCAP network metrics to DNS CSV files'
|
||||
)
|
||||
parser.add_argument('input_dir', help='Input directory (e.g., results_merged)')
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
default='./results_enhanced',
|
||||
help='Output directory (default: ./results_enhanced)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Preview files without processing'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
action='store_true',
|
||||
help='Show detailed timing information'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("=" * 60)
|
||||
print("ENHANCE DNS CSVs WITH PCAP METRICS")
|
||||
print("=" * 60)
|
||||
print(f"Input: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
if args.debug:
|
||||
print("Debug: ENABLED")
|
||||
print()
|
||||
|
||||
# Find CSV files
|
||||
csv_files = list(Path(args.input_dir).rglob('*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
print("❌ No CSV files found")
|
||||
return 1
|
||||
|
||||
print(f"Found {len(csv_files)} CSV files\n")
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN - would process:")
|
||||
for csv_path in csv_files:
|
||||
pcap_path = csv_path.with_suffix('.pcap')
|
||||
print(f" {csv_path.relative_to(args.input_dir)}")
|
||||
print(f" PCAP: {'✓' if pcap_path.exists() else '✗'}")
|
||||
return 0
|
||||
|
||||
# Process files
|
||||
success = 0
|
||||
failed = 0
|
||||
|
||||
for csv_path in csv_files:
|
||||
pcap_path = csv_path.with_suffix('.pcap')
|
||||
rel_path = csv_path.relative_to(args.input_dir)
|
||||
output_path = Path(args.output) / rel_path
|
||||
|
||||
if enhance_csv(str(csv_path), str(pcap_path), str(output_path),
|
||||
args.debug):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 60)
|
||||
print(f"✓ Success: {success}")
|
||||
print(f"✗ Failed: {failed}")
|
||||
print(f"Total: {len(csv_files)}")
|
||||
print(f"\nOutput: {args.output}")
|
||||
|
||||
return 0 if failed == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
367
scripts/tools/clean_pcaps.py
Normal file
367
scripts/tools/clean_pcaps.py
Normal file
@@ -0,0 +1,367 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Advanced PCAP filter for DNS traffic (with IPv6 support).
|
||||
|
||||
Filters out:
|
||||
- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses)
|
||||
- AdGuard DNS servers (for non-AdGuard captures)
|
||||
- Non-DNS traffic based on protocol-specific ports
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
# Test machine IPs (IPv4 and IPv6 from your provided info)
|
||||
TEST_IPV4 = '10.0.0.50'
|
||||
TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e'
|
||||
TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d'
|
||||
|
||||
# Port mappings
|
||||
PORT_MAP = {
|
||||
'udp': [53], # DNS-over-UDP
|
||||
'tls': [53, 853], # DNS-over-TLS
|
||||
'https': [53, 443], # DNS-over-HTTPS (DoH)
|
||||
'doq': [53, 784, 8853], # DNS-over-QUIC
|
||||
'doh3': [53, 443] # DNS-over-HTTP/3
|
||||
}
|
||||
|
||||
# AdGuard DNS IPs to filter out (for non-AdGuard captures)
|
||||
ADGUARD_IPS = [
|
||||
'94.140.14.14',
|
||||
'94.140.15.15',
|
||||
'2a10:50c0::ad1:ff',
|
||||
'2a10:50c0::ad2:ff'
|
||||
]
|
||||
|
||||
def parse_filename(filename):
|
||||
"""Extract protocol from filename"""
|
||||
base = filename.replace('.pcap', '').replace('.csv', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 1: # Minimum: protocol
|
||||
return None
|
||||
|
||||
protocol = parts[0].lower()
|
||||
return protocol
|
||||
|
||||
def extract_resolver_from_path(pcap_path):
|
||||
"""Extract resolver name from directory structure"""
|
||||
parts = Path(pcap_path).parts
|
||||
|
||||
for part in parts:
|
||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
||||
return part.lower()
|
||||
|
||||
return None
|
||||
|
||||
def build_filter_expression(protocol, resolver):
|
||||
"""
|
||||
Build tshark filter expression.
|
||||
|
||||
Strategy:
|
||||
1. Only protocol-specific DNS ports
|
||||
2. Keep only traffic involving the test machine (IPv4/IPv6)
|
||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
||||
"""
|
||||
|
||||
# Get ports for this protocol
|
||||
ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853])
|
||||
|
||||
# Build port filter (UDP or TCP on these ports)
|
||||
port_conditions = []
|
||||
for port in ports:
|
||||
port_conditions.append(f'(udp.port == {port} or tcp.port == {port})')
|
||||
|
||||
port_filter = ' or '.join(port_conditions)
|
||||
|
||||
# Build test machine filter (keep if src or dst is test machine IP)
|
||||
machine_conditions = [f'(ip.addr == {TEST_IPV4})']
|
||||
if TEST_IPV6_GLOBAL:
|
||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})')
|
||||
if TEST_IPV6_LINKLOCAL:
|
||||
machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})')
|
||||
|
||||
machine_filter = ' or '.join(machine_conditions)
|
||||
|
||||
# Build AdGuard exclusion filter
|
||||
adguard_exclusions = []
|
||||
if resolver != 'adguard':
|
||||
for ip in ADGUARD_IPS:
|
||||
if ':' in ip: # IPv6
|
||||
adguard_exclusions.append(f'!(ipv6.addr == {ip})')
|
||||
else: # IPv4
|
||||
adguard_exclusions.append(f'!(ip.addr == {ip})')
|
||||
|
||||
# Combine all filters
|
||||
filters = [f'({port_filter})', f'({machine_filter})']
|
||||
|
||||
if adguard_exclusions:
|
||||
adguard_filter = ' and '.join(adguard_exclusions)
|
||||
filters.append(f'({adguard_filter})')
|
||||
|
||||
final_filter = ' and '.join(filters)
|
||||
|
||||
return final_filter
|
||||
|
||||
def filter_pcap(input_path, output_path, filter_expr, verbose=False):
|
||||
"""Apply filter to PCAP file using tshark"""
|
||||
|
||||
cmd = [
|
||||
'tshark',
|
||||
'-r', input_path,
|
||||
'-Y', filter_expr,
|
||||
'-w', output_path,
|
||||
'-F', 'pcap'
|
||||
]
|
||||
|
||||
try:
|
||||
if verbose:
|
||||
print(f" Filter: {filter_expr}")
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f" ✗ Error: {result.stderr.strip()}")
|
||||
return False
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
print(f" ✗ Output file not created")
|
||||
return False
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
if output_size < 24:
|
||||
print(f" ⚠ Warning: Output is empty")
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f" ✗ Timeout (>5 minutes)")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" ✗ Exception: {e}")
|
||||
return False
|
||||
|
||||
def find_pcap_files(root_dir):
|
||||
"""Recursively find all PCAP files"""
|
||||
pcap_files = []
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
for file in files:
|
||||
if file.endswith('.pcap'):
|
||||
full_path = os.path.join(root, file)
|
||||
pcap_files.append(full_path)
|
||||
return sorted(pcap_files)
|
||||
|
||||
def format_bytes(bytes_val):
|
||||
"""Format bytes as human readable"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes_val < 1024.0:
|
||||
return f"{bytes_val:.1f} {unit}"
|
||||
bytes_val /= 1024.0
|
||||
return f"{bytes_val:.1f} TB"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Filtering rules:
|
||||
1. Only include traffic on protocol-specific DNS ports
|
||||
2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses)
|
||||
3. Exclude AdGuard IPs for non-AdGuard captures
|
||||
|
||||
Protocol-specific ports:
|
||||
udp: 53
|
||||
tls: 53, 853
|
||||
https: 53, 443
|
||||
doq: 53, 784, 8853
|
||||
doh3: 53, 443
|
||||
|
||||
Examples:
|
||||
# Dry run
|
||||
%(prog)s ./results --dry-run
|
||||
|
||||
# Filter with verbose output
|
||||
%(prog)s ./results --verbose
|
||||
|
||||
# Custom output directory
|
||||
%(prog)s ./results --output ./cleaned
|
||||
'''
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input_dir',
|
||||
help='Input directory containing PCAP files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
default='./results_filtered',
|
||||
help='Output directory (default: ./results_filtered)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without filtering'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--limit',
|
||||
type=int,
|
||||
help='Only process first N files (for testing)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='Verbose output (show filter expressions)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--overwrite',
|
||||
action='store_true',
|
||||
help='Overwrite existing filtered files'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for tshark
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['tshark', '-v'],
|
||||
capture_output=True,
|
||||
check=True
|
||||
)
|
||||
if args.verbose:
|
||||
version = result.stdout.decode().split('\n')[0]
|
||||
print(f"Using: {version}\n")
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("Error: tshark not found. Install Wireshark/tshark:")
|
||||
print(" Ubuntu/Debian: sudo apt-get install tshark")
|
||||
print(" macOS: brew install wireshark")
|
||||
return 1
|
||||
|
||||
print("=" * 80)
|
||||
print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)")
|
||||
print("=" * 80)
|
||||
print("Filters:")
|
||||
print(" 1. Protocol-specific DNS ports only")
|
||||
print(" 2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)")
|
||||
print(" 3. Exclude AdGuard IPs (for non-AdGuard captures)")
|
||||
print(f"\nInput: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
|
||||
# Find PCAP files
|
||||
print(f"\nScanning for PCAP files...")
|
||||
pcap_files = find_pcap_files(args.input_dir)
|
||||
|
||||
if not pcap_files:
|
||||
print(f"No PCAP files found in {args.input_dir}")
|
||||
return 1
|
||||
|
||||
print(f"Found {len(pcap_files)} PCAP files")
|
||||
|
||||
total_input_size = sum(os.path.getsize(f) for f in pcap_files)
|
||||
print(f"Total size: {format_bytes(total_input_size)}")
|
||||
|
||||
if args.limit:
|
||||
pcap_files = pcap_files[:args.limit]
|
||||
print(f"Limiting to first {args.limit} files")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n*** DRY RUN MODE ***\n")
|
||||
else:
|
||||
print()
|
||||
|
||||
# Process files
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
fail_count = 0
|
||||
total_output_size = 0
|
||||
|
||||
for i, input_path in enumerate(pcap_files, 1):
|
||||
# Extract info from path
|
||||
filename = Path(input_path).name
|
||||
protocol = parse_filename(filename)
|
||||
resolver = extract_resolver_from_path(input_path)
|
||||
|
||||
if not protocol:
|
||||
print(f"[{i}/{len(pcap_files)}] {filename}")
|
||||
print(f" ⚠ Could not parse protocol, skipping")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
# Create output path
|
||||
rel_path = os.path.relpath(input_path, args.input_dir)
|
||||
output_path = os.path.join(args.output, rel_path)
|
||||
|
||||
input_size = os.path.getsize(input_path)
|
||||
|
||||
print(f"[{i}/{len(pcap_files)}] {rel_path}")
|
||||
print(f" Protocol: {protocol.upper()}")
|
||||
print(f" Resolver: {resolver or 'unknown'}")
|
||||
print(f" Size: {format_bytes(input_size)}")
|
||||
|
||||
# Check if already filtered
|
||||
if os.path.exists(output_path) and not args.overwrite:
|
||||
output_size = os.path.getsize(output_path)
|
||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
||||
print(f" ⊙ Already filtered: {format_bytes(output_size)} "
|
||||
f"({reduction:.1f}% reduction)")
|
||||
skip_count += 1
|
||||
total_output_size += output_size
|
||||
continue
|
||||
|
||||
# Build filter
|
||||
filter_expr = build_filter_expression(protocol, resolver)
|
||||
|
||||
if args.dry_run:
|
||||
print(f" → Would filter")
|
||||
if args.verbose:
|
||||
print(f" Filter: {filter_expr}")
|
||||
continue
|
||||
|
||||
# Create output directory
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Filter
|
||||
success = filter_pcap(input_path, output_path, filter_expr, args.verbose)
|
||||
|
||||
if success:
|
||||
output_size = os.path.getsize(output_path)
|
||||
reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0
|
||||
print(f" ✓ Filtered: {format_bytes(output_size)} "
|
||||
f"({reduction:.1f}% reduction)")
|
||||
success_count += 1
|
||||
total_output_size += output_size
|
||||
else:
|
||||
fail_count += 1
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if args.dry_run:
|
||||
print(f"Would process: {len(pcap_files)} files")
|
||||
else:
|
||||
print(f"Successful: {success_count}")
|
||||
print(f"Skipped: {skip_count} (already filtered or unparseable)")
|
||||
print(f"Failed: {fail_count}")
|
||||
print(f"Total: {len(pcap_files)}")
|
||||
|
||||
if success_count > 0 or skip_count > 0:
|
||||
print(f"\nInput size: {format_bytes(total_input_size)}")
|
||||
print(f"Output size: {format_bytes(total_output_size)}")
|
||||
if total_input_size > 0:
|
||||
reduction = ((total_input_size - total_output_size) /
|
||||
total_input_size * 100)
|
||||
print(f"Reduction: {reduction:.1f}%")
|
||||
print(f"\nOutput directory: {args.output}")
|
||||
|
||||
return 0 if fail_count == 0 else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
274
scripts/tools/merge_files.py
Normal file
274
scripts/tools/merge_files.py
Normal file
@@ -0,0 +1,274 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Merge DNS test files by configuration.
|
||||
|
||||
- Merges CSVs of same config (adds 'run_id' column for traceability)
|
||||
- Optionally merges PCAPs using mergecap
|
||||
- Flattens date structure
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import subprocess
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
|
||||
def parse_filename(filename):
|
||||
"""
|
||||
Extract config key from filename.
|
||||
Format: protocol[-flags]-timestamp.{csv,pcap}
|
||||
Config key: protocol[-flags] (ignores timestamp)
|
||||
"""
|
||||
base = filename.replace('.csv', '').replace('.pcap', '')
|
||||
parts = base.split('-')
|
||||
|
||||
if len(parts) < 2:
|
||||
return None
|
||||
|
||||
# Config is everything except timestamp
|
||||
config = '-'.join(parts[:-1])
|
||||
timestamp = parts[-1]
|
||||
|
||||
return config, timestamp
|
||||
|
||||
def extract_resolver_from_path(file_path):
|
||||
"""Extract resolver name from path"""
|
||||
parts = Path(file_path).parts
|
||||
for part in parts:
|
||||
if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']:
|
||||
return part.lower()
|
||||
return None
|
||||
|
||||
def find_files(root_dir, extension):
|
||||
"""Find all files with given extension"""
|
||||
files = []
|
||||
for root, dirs, filenames in os.walk(root_dir):
|
||||
for filename in filenames:
|
||||
if filename.endswith(extension):
|
||||
full_path = os.path.join(root, filename)
|
||||
files.append(full_path)
|
||||
return sorted(files)
|
||||
|
||||
def merge_csvs(csv_files, output_path, fieldnames):
|
||||
"""Merge multiple CSVs into one, adding 'run_id' column"""
|
||||
with open(output_path, 'w', newline='') as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id'])
|
||||
writer.writeheader()
|
||||
|
||||
for csv_path in csv_files:
|
||||
# Use timestamp as run_id
|
||||
filename = Path(csv_path).name
|
||||
_, timestamp = parse_filename(filename)
|
||||
run_id = timestamp # Or add date if needed
|
||||
|
||||
with open(csv_path, 'r', newline='') as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
for row in reader:
|
||||
row['run_id'] = run_id
|
||||
writer.writerow(row)
|
||||
|
||||
def merge_pcaps(pcap_files, output_path):
|
||||
"""Merge PCAP files using mergecap"""
|
||||
cmd = ['mergecap', '-w', output_path] + pcap_files
|
||||
try:
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f" ✗ mergecap error: {e.stderr.decode()}")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
print("Error: mergecap not found. Install Wireshark:")
|
||||
print(" Ubuntu: sudo apt install wireshark-common")
|
||||
print(" macOS: brew install wireshark")
|
||||
return False
|
||||
|
||||
def format_bytes(bytes_val):
|
||||
"""Format bytes as human readable"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes_val < 1024.0:
|
||||
return f"{bytes_val:.1f} {unit}"
|
||||
bytes_val /= 1024.0
|
||||
return f"{bytes_val:.1f} TB"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Merge DNS test files by configuration',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Merges files of same config across dates/timestamps.
|
||||
Output: ./results_merged/[resolver]/[config].csv (merged)
|
||||
./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps)
|
||||
|
||||
Examples:
|
||||
# Dry run to preview
|
||||
%(prog)s ./results --dry-run
|
||||
|
||||
# Merge CSVs only (recommended)
|
||||
%(prog)s ./results
|
||||
|
||||
# Merge CSVs and PCAPs
|
||||
%(prog)s ./results --merge-pcaps
|
||||
|
||||
# Custom output directory
|
||||
%(prog)s ./results --output ./merged_data
|
||||
'''
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input_dir',
|
||||
help='Input directory (e.g., ./results)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
default='./results_merged',
|
||||
help='Output directory (default: ./results_merged)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--merge-pcaps',
|
||||
action='store_true',
|
||||
help='Merge PCAP files (requires mergecap from Wireshark)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be done without merging'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-y', '--yes',
|
||||
action='store_true',
|
||||
help='Skip confirmation prompt'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.input_dir):
|
||||
print(f"Error: Input directory not found: {args.input_dir}")
|
||||
return 1
|
||||
|
||||
# Find all files
|
||||
print("=" * 80)
|
||||
print("MERGE DNS TEST FILES")
|
||||
print("=" * 80)
|
||||
print(f"Input: {args.input_dir}")
|
||||
print(f"Output: {args.output}")
|
||||
print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}")
|
||||
|
||||
csv_files = find_files(args.input_dir, '.csv')
|
||||
pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else []
|
||||
|
||||
if not csv_files and not pcap_files:
|
||||
print("\nNo CSV/PCAP files found")
|
||||
return 1
|
||||
|
||||
print(f"\nFound {len(csv_files)} CSV files")
|
||||
if args.merge_pcaps:
|
||||
print(f"Found {len(pcap_files)} PCAP files")
|
||||
|
||||
# Group files by resolver and config
|
||||
csv_groups = defaultdict(list)
|
||||
pcap_groups = defaultdict(list)
|
||||
|
||||
for csv_path in csv_files:
|
||||
config, _ = parse_filename(Path(csv_path).name)
|
||||
resolver = extract_resolver_from_path(csv_path)
|
||||
if config and resolver:
|
||||
key = (resolver, config)
|
||||
csv_groups[key].append(csv_path)
|
||||
|
||||
for pcap_path in pcap_files:
|
||||
config, _ = parse_filename(Path(pcap_path).name)
|
||||
resolver = extract_resolver_from_path(pcap_path)
|
||||
if config and resolver:
|
||||
key = (resolver, config)
|
||||
pcap_groups[key].append(pcap_path)
|
||||
|
||||
# Summary
|
||||
print("\nConfigs to merge:")
|
||||
print("-" * 80)
|
||||
for (resolver, config), files in sorted(csv_groups.items()):
|
||||
print(f" {resolver}/{config}: {len(files)} runs")
|
||||
|
||||
total_runs = sum(len(files) for files in csv_groups.values())
|
||||
print(f"\nTotal configs: {len(csv_groups)}")
|
||||
print(f"Total runs: {total_runs}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n*** DRY RUN MODE ***\n")
|
||||
for (resolver, config) in sorted(csv_groups.keys()):
|
||||
print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)")
|
||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
||||
print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)")
|
||||
return 0
|
||||
|
||||
# Confirmation
|
||||
if not args.yes:
|
||||
response = input(f"\nMerge all into {args.output}? [y/N] ")
|
||||
if response.lower() not in ['y', 'yes']:
|
||||
print("Cancelled")
|
||||
return 0
|
||||
|
||||
# Merge
|
||||
print("\n" + "=" * 80)
|
||||
print("MERGING FILES")
|
||||
print("=" * 80)
|
||||
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
total_queries = 0
|
||||
total_size = 0
|
||||
|
||||
# Get standard CSV fieldnames (from first file)
|
||||
first_csv = next(iter(csv_files))
|
||||
with open(first_csv, 'r') as f:
|
||||
reader = csv.DictReader(f)
|
||||
fieldnames = reader.fieldnames
|
||||
|
||||
for (resolver, config), files in sorted(csv_groups.items()):
|
||||
print(f"\n{resolver}/{config} ({len(files)} runs)")
|
||||
|
||||
# Merge CSVs
|
||||
output_csv = os.path.join(args.output, resolver, f"{config}.csv")
|
||||
os.makedirs(os.path.dirname(output_csv), exist_ok=True)
|
||||
|
||||
merge_csvs(files, output_csv, fieldnames)
|
||||
|
||||
# Count queries in merged file
|
||||
with open(output_csv, 'r') as f:
|
||||
query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header
|
||||
|
||||
print(f" ✓ Merged CSV: {query_count:,} queries")
|
||||
total_queries += query_count
|
||||
success_count += 1
|
||||
|
||||
# Merge PCAPs if requested
|
||||
if args.merge_pcaps and (resolver, config) in pcap_groups:
|
||||
output_pcap = os.path.join(args.output, resolver, f"{config}.pcap")
|
||||
pcap_list = pcap_groups[(resolver, config)]
|
||||
|
||||
if merge_pcaps(pcap_list, output_pcap):
|
||||
merged_size = os.path.getsize(output_pcap)
|
||||
orig_size = sum(os.path.getsize(p) for p in pcap_list)
|
||||
print(f" ✓ Merged PCAP: {format_bytes(merged_size)} "
|
||||
f"(from {format_bytes(orig_size)})")
|
||||
total_size += merged_size
|
||||
else:
|
||||
print(f" ✗ PCAP merge failed")
|
||||
fail_count += 1
|
||||
|
||||
# Final summary
|
||||
print("\n" + "=" * 80)
|
||||
print("COMPLETE")
|
||||
print("=" * 80)
|
||||
print(f"Successful configs: {success_count}")
|
||||
print(f"Failed: {fail_count}")
|
||||
print(f"Total queries: {total_queries:,}")
|
||||
if args.merge_pcaps:
|
||||
print(f"Total PCAP size: {format_bytes(total_size)}")
|
||||
print(f"\nMerged files in: {args.output}")
|
||||
|
||||
return 0 if fail_count == 0 else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user