diff --git a/.gitignore b/.gitignore index adfffb1..aa3ee76 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ **/tls-key-log.txt /results /results.bak +/results_merged diff --git a/flake.lock b/flake.lock index 429743f..ed6abb9 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1758916627, - "narHash": "sha256-fB2ISCc+xn+9hZ6gOsABxSBcsCgLCjbJ5bC6U9bPzQ4=", + "lastModified": 1760103332, + "narHash": "sha256-BMsGVfKl4Q80Pr9T1AkCRljO1bpwCmY8rTBVj8XGuhA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "53614373268559d054c080d070cfc732dbe68ac4", + "rev": "870493f9a8cb0b074ae5b411b2f232015db19a65", "type": "github" }, "original": { diff --git a/internal/qol/measurement.go b/internal/qol/measurement.go index ccab8cf..8917bfc 100644 --- a/internal/qol/measurement.go +++ b/internal/qol/measurement.go @@ -155,8 +155,6 @@ func (r *MeasurementRunner) runQueries(dnsClient client.DNSClient, upstream stri r.printQueryResult(metric) - // For keep-alive connections, add smaller delays between queries - // to better utilize the persistent connection if r.config.KeepAlive { time.Sleep(5 * time.Millisecond) } else { @@ -182,7 +180,6 @@ func (r *MeasurementRunner) performQuery(dnsClient client.DNSClient, domain, ups AuthoritativeDNSSEC: r.config.AuthoritativeDNSSEC, KeepAlive: r.config.KeepAlive, DNSServer: upstream, - Timestamp: time.Now(), } msg := new(dns.Msg) @@ -199,6 +196,7 @@ func (r *MeasurementRunner) performQuery(dnsClient client.DNSClient, domain, ups metric.RequestSize = len(packed) start := time.Now() + metric.Timestamp = start resp, err := dnsClient.Query(msg) metric.Duration = time.Since(start).Nanoseconds() metric.DurationMs = float64(metric.Duration) / 1e6 diff --git a/internal/qol/results/writer.go b/internal/qol/results/writer.go index 6f9765e..158d2ae 100644 --- a/internal/qol/results/writer.go +++ b/internal/qol/results/writer.go @@ -67,7 +67,7 @@ func (mw *MetricsWriter) WriteMetric(metric DNSMetric) error { strconv.FormatBool(metric.AuthoritativeDNSSEC), strconv.FormatBool(metric.KeepAlive), metric.DNSServer, - metric.Timestamp.Format(time.RFC3339), + metric.Timestamp.Format(time.RFC3339Nano), strconv.FormatInt(metric.Duration, 10), strconv.FormatFloat(metric.DurationMs, 'f', 3, 64), strconv.Itoa(metric.RequestSize), diff --git a/analyze_dns_metrics.py b/scripts/analysis/analyze_dns_metrics.py similarity index 99% rename from analyze_dns_metrics.py rename to scripts/analysis/analyze_dns_metrics.py index 44971d4..9428b10 100644 --- a/analyze_dns_metrics.py +++ b/scripts/analysis/analyze_dns_metrics.py @@ -10,7 +10,7 @@ def map_server_to_resolver(server): if '1.1.1.1' in server_lower or 'cloudflare' in server_lower: return 'Cloudflare' - elif '8.8.8.8' in server_lower or 'dns.google' in server_lower: + elif '8.8.8.8' in server_lower or 'google' in server_lower: return 'Google' elif '9.9.9.9' in server_lower or 'quad9' in server_lower: return 'Quad9' diff --git a/scripts/tools/add_extra_metrics_to_csv.py b/scripts/tools/add_extra_metrics_to_csv.py new file mode 100644 index 0000000..d7884e8 --- /dev/null +++ b/scripts/tools/add_extra_metrics_to_csv.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +""" +Add network metrics from PCAP files to DNS CSV files. +Adds: pcap_network_bytes_in, pcap_network_bytes_out, pcap_overhead_bytes +""" + +import csv +import os +import argparse +from pathlib import Path +from datetime import datetime, timezone +import dpkt +import socket + +# Test machine IPs +TEST_IPS = { + '10.0.0.50', + '2001:818:e73e:ba00:5506:dfd4:ed8b:96e', + 'fe80::fe98:c62e:4463:9a2d' +} + + +def inet_to_str(inet): + """Convert inet bytes to IP string""" + try: + return socket.inet_ntop(socket.AF_INET, inet) + except ValueError: + try: + return socket.inet_ntop(socket.AF_INET6, inet) + except ValueError: + return None + + +def read_pcap(pcap_path): + """Read PCAP and return list of (timestamp_ns, size, src_ip, dst_ip)""" + packets = [] + + with open(pcap_path, 'rb') as f: + try: + pcap = dpkt.pcap.Reader(f) + except: + f.seek(0) + pcap = dpkt.pcapng.Reader(f) + + for ts, buf in pcap: + try: + # Convert PCAP timestamp (float seconds) to nanoseconds + timestamp_ns = int(ts * 1_000_000_000) + size = len(buf) + eth = dpkt.ethernet.Ethernet(buf) + + src_ip = dst_ip = None + + if isinstance(eth.data, dpkt.ip.IP): + src_ip = inet_to_str(eth.data.src) + dst_ip = inet_to_str(eth.data.dst) + elif isinstance(eth.data, dpkt.ip6.IP6): + src_ip = inet_to_str(eth.data.src) + dst_ip = inet_to_str(eth.data.dst) + + if src_ip and dst_ip: + packets.append((timestamp_ns, size, src_ip, dst_ip)) + + except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError): + continue + + return packets + + +def find_packets_in_window(packets, start_ns, duration_ns): + """Find packets within exact time window (nanosecond precision)""" + end_ns = start_ns + duration_ns + + matching = [] + for timestamp_ns, size, src_ip, dst_ip in packets: + if start_ns <= timestamp_ns <= end_ns: + matching.append((size, src_ip, dst_ip)) + + return matching + + +def calculate_metrics(packets): + """Calculate network metrics from packets""" + bytes_in = 0 + bytes_out = 0 + + for size, src_ip, dst_ip in packets: + if dst_ip in TEST_IPS: + bytes_in += size + elif src_ip in TEST_IPS: + bytes_out += size + + return { + 'pcap_network_bytes_in': bytes_in, + 'pcap_network_bytes_out': bytes_out, + 'pcap_overhead_bytes': bytes_in + bytes_out + } + + +def parse_timestamp_to_ns(ts_str): + """Parse ISO timestamp to nanoseconds since epoch""" + try: + dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00')) + if dt.tzinfo is not None: + dt = dt.astimezone(timezone.utc) + # Convert to nanoseconds since epoch + return int(dt.timestamp() * 1_000_000_000) + except ValueError: + return None + + +def enhance_csv(csv_path, pcap_path, output_path, debug=False): + """Add PCAP metrics to CSV""" + if not os.path.exists(pcap_path): + print(f"⚠️ PCAP not found: {pcap_path}") + return False + + print(f"Processing: {os.path.basename(csv_path)}") + + # Read PCAP + try: + packets = read_pcap(pcap_path) + print(f" Loaded {len(packets)} packets") + + if packets and debug: + first_pcap_ns = packets[0][0] + last_pcap_ns = packets[-1][0] + print(f" First PCAP packet: {first_pcap_ns} ns") + print(f" Last PCAP packet: {last_pcap_ns} ns") + print(f" PCAP duration: {(last_pcap_ns - first_pcap_ns) / 1e9:.3f}s") + + except Exception as e: + print(f" ❌ Error reading PCAP: {e}") + return False + + if not packets: + print(" ❌ No packets found") + return False + + # Read CSV + with open(csv_path, 'r', newline='') as f: + reader = csv.DictReader(f) + fieldnames = list(reader.fieldnames) + [ + 'pcap_network_bytes_in', + 'pcap_network_bytes_out', + 'pcap_overhead_bytes' + ] + rows = list(reader) + + if rows and debug: + first_csv_ns = parse_timestamp_to_ns(rows[0]['timestamp']) + last_csv_ns = parse_timestamp_to_ns(rows[-1]['timestamp']) + if first_csv_ns and last_csv_ns: + print(f" First CSV query: {first_csv_ns} ns") + print(f" Last CSV query: {last_csv_ns} ns") + print(f" CSV duration: {(last_csv_ns - first_csv_ns) / 1e9:.3f}s") + + # Check alignment + offset_ns = packets[0][0] - first_csv_ns + print(f" Time offset (PCAP - CSV): {offset_ns / 1e9:.3f}s") + + # Enhance rows + enhanced = [] + matched = 0 + + for i, row in enumerate(rows): + ts_ns = parse_timestamp_to_ns(row['timestamp']) + if not ts_ns: + continue + + duration_ns = int(row.get('duration_ns', 0)) + + matching_packets = find_packets_in_window(packets, ts_ns, duration_ns) + + metrics = calculate_metrics(matching_packets) + row.update(metrics) + enhanced.append(row) + + if metrics['pcap_overhead_bytes'] > 0: + matched += 1 + + # Debug first few queries + if debug and i < 3: + print(f" Query {i}: {row['domain']}") + print(f" Start: {ts_ns} ns") + print(f" Duration: {duration_ns} ns ({duration_ns / 1e6:.3f}ms)") + print(f" End: {ts_ns + duration_ns} ns") + print(f" Matched packets: {len(matching_packets)}") + print(f" Bytes: {metrics['pcap_overhead_bytes']}") + + print(f" Matched: {matched}/{len(rows)} queries") + + if matched == 0: + print(" ⚠️ WARNING: No queries matched any packets!") + print(" This might indicate timestamp misalignment.") + + # Write output + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore') + writer.writeheader() + writer.writerows(enhanced) + + print(f" ✓ Saved: {output_path}") + return True + + +def main(): + parser = argparse.ArgumentParser( + description='Add PCAP network metrics to DNS CSV files' + ) + parser.add_argument('input_dir', help='Input directory (e.g., results_merged)') + parser.add_argument( + '--output', + default='./results_enhanced', + help='Output directory (default: ./results_enhanced)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Preview files without processing' + ) + parser.add_argument( + '--debug', + action='store_true', + help='Show detailed timing information' + ) + + args = parser.parse_args() + + print("=" * 60) + print("ENHANCE DNS CSVs WITH PCAP METRICS") + print("=" * 60) + print(f"Input: {args.input_dir}") + print(f"Output: {args.output}") + if args.debug: + print("Debug: ENABLED") + print() + + # Find CSV files + csv_files = list(Path(args.input_dir).rglob('*.csv')) + + if not csv_files: + print("❌ No CSV files found") + return 1 + + print(f"Found {len(csv_files)} CSV files\n") + + if args.dry_run: + print("DRY RUN - would process:") + for csv_path in csv_files: + pcap_path = csv_path.with_suffix('.pcap') + print(f" {csv_path.relative_to(args.input_dir)}") + print(f" PCAP: {'✓' if pcap_path.exists() else '✗'}") + return 0 + + # Process files + success = 0 + failed = 0 + + for csv_path in csv_files: + pcap_path = csv_path.with_suffix('.pcap') + rel_path = csv_path.relative_to(args.input_dir) + output_path = Path(args.output) / rel_path + + if enhance_csv(str(csv_path), str(pcap_path), str(output_path), + args.debug): + success += 1 + else: + failed += 1 + print() + + # Summary + print("=" * 60) + print(f"✓ Success: {success}") + print(f"✗ Failed: {failed}") + print(f"Total: {len(csv_files)}") + print(f"\nOutput: {args.output}") + + return 0 if failed == 0 else 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tools/clean_pcaps.py b/scripts/tools/clean_pcaps.py new file mode 100644 index 0000000..d7ca555 --- /dev/null +++ b/scripts/tools/clean_pcaps.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +""" +Advanced PCAP filter for DNS traffic (with IPv6 support). + +Filters out: +- Local network traffic except test machine (IPv4: 10.0.0.50; IPv6: specific addresses) +- AdGuard DNS servers (for non-AdGuard captures) +- Non-DNS traffic based on protocol-specific ports +""" + +import os +import subprocess +from pathlib import Path +import argparse + +# Test machine IPs (IPv4 and IPv6 from your provided info) +TEST_IPV4 = '10.0.0.50' +TEST_IPV6_GLOBAL = '2001:818:e73e:ba00:5506:dfd4:ed8b:96e' +TEST_IPV6_LINKLOCAL = 'fe80::fe98:c62e:4463:9a2d' + +# Port mappings +PORT_MAP = { + 'udp': [53], # DNS-over-UDP + 'tls': [53, 853], # DNS-over-TLS + 'https': [53, 443], # DNS-over-HTTPS (DoH) + 'doq': [53, 784, 8853], # DNS-over-QUIC + 'doh3': [53, 443] # DNS-over-HTTP/3 +} + +# AdGuard DNS IPs to filter out (for non-AdGuard captures) +ADGUARD_IPS = [ + '94.140.14.14', + '94.140.15.15', + '2a10:50c0::ad1:ff', + '2a10:50c0::ad2:ff' +] + +def parse_filename(filename): + """Extract protocol from filename""" + base = filename.replace('.pcap', '').replace('.csv', '') + parts = base.split('-') + + if len(parts) < 1: # Minimum: protocol + return None + + protocol = parts[0].lower() + return protocol + +def extract_resolver_from_path(pcap_path): + """Extract resolver name from directory structure""" + parts = Path(pcap_path).parts + + for part in parts: + if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']: + return part.lower() + + return None + +def build_filter_expression(protocol, resolver): + """ + Build tshark filter expression. + + Strategy: + 1. Only protocol-specific DNS ports + 2. Keep only traffic involving the test machine (IPv4/IPv6) + 3. Exclude AdGuard IPs for non-AdGuard captures + """ + + # Get ports for this protocol + ports = PORT_MAP.get(protocol, [53, 443, 853, 784, 8853]) + + # Build port filter (UDP or TCP on these ports) + port_conditions = [] + for port in ports: + port_conditions.append(f'(udp.port == {port} or tcp.port == {port})') + + port_filter = ' or '.join(port_conditions) + + # Build test machine filter (keep if src or dst is test machine IP) + machine_conditions = [f'(ip.addr == {TEST_IPV4})'] + if TEST_IPV6_GLOBAL: + machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_GLOBAL})') + if TEST_IPV6_LINKLOCAL: + machine_conditions.append(f'(ipv6.addr == {TEST_IPV6_LINKLOCAL})') + + machine_filter = ' or '.join(machine_conditions) + + # Build AdGuard exclusion filter + adguard_exclusions = [] + if resolver != 'adguard': + for ip in ADGUARD_IPS: + if ':' in ip: # IPv6 + adguard_exclusions.append(f'!(ipv6.addr == {ip})') + else: # IPv4 + adguard_exclusions.append(f'!(ip.addr == {ip})') + + # Combine all filters + filters = [f'({port_filter})', f'({machine_filter})'] + + if adguard_exclusions: + adguard_filter = ' and '.join(adguard_exclusions) + filters.append(f'({adguard_filter})') + + final_filter = ' and '.join(filters) + + return final_filter + +def filter_pcap(input_path, output_path, filter_expr, verbose=False): + """Apply filter to PCAP file using tshark""" + + cmd = [ + 'tshark', + '-r', input_path, + '-Y', filter_expr, + '-w', output_path, + '-F', 'pcap' + ] + + try: + if verbose: + print(f" Filter: {filter_expr}") + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300 + ) + + if result.returncode != 0: + print(f" ✗ Error: {result.stderr.strip()}") + return False + + if not os.path.exists(output_path): + print(f" ✗ Output file not created") + return False + + output_size = os.path.getsize(output_path) + if output_size < 24: + print(f" ⚠ Warning: Output is empty") + + return True + + except subprocess.TimeoutExpired: + print(f" ✗ Timeout (>5 minutes)") + return False + except Exception as e: + print(f" ✗ Exception: {e}") + return False + +def find_pcap_files(root_dir): + """Recursively find all PCAP files""" + pcap_files = [] + for root, dirs, files in os.walk(root_dir): + for file in files: + if file.endswith('.pcap'): + full_path = os.path.join(root, file) + pcap_files.append(full_path) + return sorted(pcap_files) + +def format_bytes(bytes_val): + """Format bytes as human readable""" + for unit in ['B', 'KB', 'MB', 'GB']: + if bytes_val < 1024.0: + return f"{bytes_val:.1f} {unit}" + bytes_val /= 1024.0 + return f"{bytes_val:.1f} TB" + +def main(): + parser = argparse.ArgumentParser( + description='Advanced PCAP filter for DNS traffic (IPv4/IPv6)', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Filtering rules: + 1. Only include traffic on protocol-specific DNS ports + 2. Keep only packets involving the test machine (10.0.0.50 or its IPv6 addresses) + 3. Exclude AdGuard IPs for non-AdGuard captures + +Protocol-specific ports: + udp: 53 + tls: 53, 853 + https: 53, 443 + doq: 53, 784, 8853 + doh3: 53, 443 + +Examples: + # Dry run + %(prog)s ./results --dry-run + + # Filter with verbose output + %(prog)s ./results --verbose + + # Custom output directory + %(prog)s ./results --output ./cleaned + ''' + ) + + parser.add_argument( + 'input_dir', + help='Input directory containing PCAP files' + ) + parser.add_argument( + '-o', '--output', + default='./results_filtered', + help='Output directory (default: ./results_filtered)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be done without filtering' + ) + parser.add_argument( + '--limit', + type=int, + help='Only process first N files (for testing)' + ) + parser.add_argument( + '-v', '--verbose', + action='store_true', + help='Verbose output (show filter expressions)' + ) + parser.add_argument( + '--overwrite', + action='store_true', + help='Overwrite existing filtered files' + ) + + args = parser.parse_args() + + # Check for tshark + try: + result = subprocess.run( + ['tshark', '-v'], + capture_output=True, + check=True + ) + if args.verbose: + version = result.stdout.decode().split('\n')[0] + print(f"Using: {version}\n") + except (subprocess.CalledProcessError, FileNotFoundError): + print("Error: tshark not found. Install Wireshark/tshark:") + print(" Ubuntu/Debian: sudo apt-get install tshark") + print(" macOS: brew install wireshark") + return 1 + + print("=" * 80) + print("ADVANCED DNS PCAP FILTER (IPv4/IPv6)") + print("=" * 80) + print("Filters:") + print(" 1. Protocol-specific DNS ports only") + print(" 2. Keep only traffic involving test machine (10.0.0.50 / IPv6 addresses)") + print(" 3. Exclude AdGuard IPs (for non-AdGuard captures)") + print(f"\nInput: {args.input_dir}") + print(f"Output: {args.output}") + + # Find PCAP files + print(f"\nScanning for PCAP files...") + pcap_files = find_pcap_files(args.input_dir) + + if not pcap_files: + print(f"No PCAP files found in {args.input_dir}") + return 1 + + print(f"Found {len(pcap_files)} PCAP files") + + total_input_size = sum(os.path.getsize(f) for f in pcap_files) + print(f"Total size: {format_bytes(total_input_size)}") + + if args.limit: + pcap_files = pcap_files[:args.limit] + print(f"Limiting to first {args.limit} files") + + if args.dry_run: + print("\n*** DRY RUN MODE ***\n") + else: + print() + + # Process files + success_count = 0 + skip_count = 0 + fail_count = 0 + total_output_size = 0 + + for i, input_path in enumerate(pcap_files, 1): + # Extract info from path + filename = Path(input_path).name + protocol = parse_filename(filename) + resolver = extract_resolver_from_path(input_path) + + if not protocol: + print(f"[{i}/{len(pcap_files)}] {filename}") + print(f" ⚠ Could not parse protocol, skipping") + skip_count += 1 + continue + + # Create output path + rel_path = os.path.relpath(input_path, args.input_dir) + output_path = os.path.join(args.output, rel_path) + + input_size = os.path.getsize(input_path) + + print(f"[{i}/{len(pcap_files)}] {rel_path}") + print(f" Protocol: {protocol.upper()}") + print(f" Resolver: {resolver or 'unknown'}") + print(f" Size: {format_bytes(input_size)}") + + # Check if already filtered + if os.path.exists(output_path) and not args.overwrite: + output_size = os.path.getsize(output_path) + reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0 + print(f" ⊙ Already filtered: {format_bytes(output_size)} " + f"({reduction:.1f}% reduction)") + skip_count += 1 + total_output_size += output_size + continue + + # Build filter + filter_expr = build_filter_expression(protocol, resolver) + + if args.dry_run: + print(f" → Would filter") + if args.verbose: + print(f" Filter: {filter_expr}") + continue + + # Create output directory + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Filter + success = filter_pcap(input_path, output_path, filter_expr, args.verbose) + + if success: + output_size = os.path.getsize(output_path) + reduction = ((input_size - output_size) / input_size * 100) if input_size > 0 else 0 + print(f" ✓ Filtered: {format_bytes(output_size)} " + f"({reduction:.1f}% reduction)") + success_count += 1 + total_output_size += output_size + else: + fail_count += 1 + + # Summary + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + + if args.dry_run: + print(f"Would process: {len(pcap_files)} files") + else: + print(f"Successful: {success_count}") + print(f"Skipped: {skip_count} (already filtered or unparseable)") + print(f"Failed: {fail_count}") + print(f"Total: {len(pcap_files)}") + + if success_count > 0 or skip_count > 0: + print(f"\nInput size: {format_bytes(total_input_size)}") + print(f"Output size: {format_bytes(total_output_size)}") + if total_input_size > 0: + reduction = ((total_input_size - total_output_size) / + total_input_size * 100) + print(f"Reduction: {reduction:.1f}%") + print(f"\nOutput directory: {args.output}") + + return 0 if fail_count == 0 else 1 + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/tools/merge_files.py b/scripts/tools/merge_files.py new file mode 100644 index 0000000..8d6e300 --- /dev/null +++ b/scripts/tools/merge_files.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +""" +Merge DNS test files by configuration. + +- Merges CSVs of same config (adds 'run_id' column for traceability) +- Optionally merges PCAPs using mergecap +- Flattens date structure +""" + +import os +import csv +import subprocess +import shutil +from pathlib import Path +import argparse +from collections import defaultdict + +def parse_filename(filename): + """ + Extract config key from filename. + Format: protocol[-flags]-timestamp.{csv,pcap} + Config key: protocol[-flags] (ignores timestamp) + """ + base = filename.replace('.csv', '').replace('.pcap', '') + parts = base.split('-') + + if len(parts) < 2: + return None + + # Config is everything except timestamp + config = '-'.join(parts[:-1]) + timestamp = parts[-1] + + return config, timestamp + +def extract_resolver_from_path(file_path): + """Extract resolver name from path""" + parts = Path(file_path).parts + for part in parts: + if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']: + return part.lower() + return None + +def find_files(root_dir, extension): + """Find all files with given extension""" + files = [] + for root, dirs, filenames in os.walk(root_dir): + for filename in filenames: + if filename.endswith(extension): + full_path = os.path.join(root, filename) + files.append(full_path) + return sorted(files) + +def merge_csvs(csv_files, output_path, fieldnames): + """Merge multiple CSVs into one, adding 'run_id' column""" + with open(output_path, 'w', newline='') as outfile: + writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id']) + writer.writeheader() + + for csv_path in csv_files: + # Use timestamp as run_id + filename = Path(csv_path).name + _, timestamp = parse_filename(filename) + run_id = timestamp # Or add date if needed + + with open(csv_path, 'r', newline='') as infile: + reader = csv.DictReader(infile) + for row in reader: + row['run_id'] = run_id + writer.writerow(row) + +def merge_pcaps(pcap_files, output_path): + """Merge PCAP files using mergecap""" + cmd = ['mergecap', '-w', output_path] + pcap_files + try: + subprocess.run(cmd, capture_output=True, check=True) + return True + except subprocess.CalledProcessError as e: + print(f" ✗ mergecap error: {e.stderr.decode()}") + return False + except FileNotFoundError: + print("Error: mergecap not found. Install Wireshark:") + print(" Ubuntu: sudo apt install wireshark-common") + print(" macOS: brew install wireshark") + return False + +def format_bytes(bytes_val): + """Format bytes as human readable""" + for unit in ['B', 'KB', 'MB', 'GB']: + if bytes_val < 1024.0: + return f"{bytes_val:.1f} {unit}" + bytes_val /= 1024.0 + return f"{bytes_val:.1f} TB" + +def main(): + parser = argparse.ArgumentParser( + description='Merge DNS test files by configuration', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Merges files of same config across dates/timestamps. +Output: ./results_merged/[resolver]/[config].csv (merged) + ./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps) + +Examples: + # Dry run to preview + %(prog)s ./results --dry-run + + # Merge CSVs only (recommended) + %(prog)s ./results + + # Merge CSVs and PCAPs + %(prog)s ./results --merge-pcaps + + # Custom output directory + %(prog)s ./results --output ./merged_data + ''' + ) + + parser.add_argument( + 'input_dir', + help='Input directory (e.g., ./results)' + ) + parser.add_argument( + '--output', + default='./results_merged', + help='Output directory (default: ./results_merged)' + ) + parser.add_argument( + '--merge-pcaps', + action='store_true', + help='Merge PCAP files (requires mergecap from Wireshark)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be done without merging' + ) + parser.add_argument( + '-y', '--yes', + action='store_true', + help='Skip confirmation prompt' + ) + + args = parser.parse_args() + + if not os.path.isdir(args.input_dir): + print(f"Error: Input directory not found: {args.input_dir}") + return 1 + + # Find all files + print("=" * 80) + print("MERGE DNS TEST FILES") + print("=" * 80) + print(f"Input: {args.input_dir}") + print(f"Output: {args.output}") + print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}") + + csv_files = find_files(args.input_dir, '.csv') + pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else [] + + if not csv_files and not pcap_files: + print("\nNo CSV/PCAP files found") + return 1 + + print(f"\nFound {len(csv_files)} CSV files") + if args.merge_pcaps: + print(f"Found {len(pcap_files)} PCAP files") + + # Group files by resolver and config + csv_groups = defaultdict(list) + pcap_groups = defaultdict(list) + + for csv_path in csv_files: + config, _ = parse_filename(Path(csv_path).name) + resolver = extract_resolver_from_path(csv_path) + if config and resolver: + key = (resolver, config) + csv_groups[key].append(csv_path) + + for pcap_path in pcap_files: + config, _ = parse_filename(Path(pcap_path).name) + resolver = extract_resolver_from_path(pcap_path) + if config and resolver: + key = (resolver, config) + pcap_groups[key].append(pcap_path) + + # Summary + print("\nConfigs to merge:") + print("-" * 80) + for (resolver, config), files in sorted(csv_groups.items()): + print(f" {resolver}/{config}: {len(files)} runs") + + total_runs = sum(len(files) for files in csv_groups.values()) + print(f"\nTotal configs: {len(csv_groups)}") + print(f"Total runs: {total_runs}") + + if args.dry_run: + print("\n*** DRY RUN MODE ***\n") + for (resolver, config) in sorted(csv_groups.keys()): + print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)") + if args.merge_pcaps and (resolver, config) in pcap_groups: + print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)") + return 0 + + # Confirmation + if not args.yes: + response = input(f"\nMerge all into {args.output}? [y/N] ") + if response.lower() not in ['y', 'yes']: + print("Cancelled") + return 0 + + # Merge + print("\n" + "=" * 80) + print("MERGING FILES") + print("=" * 80) + + success_count = 0 + fail_count = 0 + total_queries = 0 + total_size = 0 + + # Get standard CSV fieldnames (from first file) + first_csv = next(iter(csv_files)) + with open(first_csv, 'r') as f: + reader = csv.DictReader(f) + fieldnames = reader.fieldnames + + for (resolver, config), files in sorted(csv_groups.items()): + print(f"\n{resolver}/{config} ({len(files)} runs)") + + # Merge CSVs + output_csv = os.path.join(args.output, resolver, f"{config}.csv") + os.makedirs(os.path.dirname(output_csv), exist_ok=True) + + merge_csvs(files, output_csv, fieldnames) + + # Count queries in merged file + with open(output_csv, 'r') as f: + query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header + + print(f" ✓ Merged CSV: {query_count:,} queries") + total_queries += query_count + success_count += 1 + + # Merge PCAPs if requested + if args.merge_pcaps and (resolver, config) in pcap_groups: + output_pcap = os.path.join(args.output, resolver, f"{config}.pcap") + pcap_list = pcap_groups[(resolver, config)] + + if merge_pcaps(pcap_list, output_pcap): + merged_size = os.path.getsize(output_pcap) + orig_size = sum(os.path.getsize(p) for p in pcap_list) + print(f" ✓ Merged PCAP: {format_bytes(merged_size)} " + f"(from {format_bytes(orig_size)})") + total_size += merged_size + else: + print(f" ✗ PCAP merge failed") + fail_count += 1 + + # Final summary + print("\n" + "=" * 80) + print("COMPLETE") + print("=" * 80) + print(f"Successful configs: {success_count}") + print(f"Failed: {fail_count}") + print(f"Total queries: {total_queries:,}") + if args.merge_pcaps: + print(f"Total PCAP size: {format_bytes(total_size)}") + print(f"\nMerged files in: {args.output}") + + return 0 if fail_count == 0 else 1 + +if __name__ == "__main__": + exit(main())