#!/usr/bin/env python3 """ Merge DNS test files by configuration. - Merges CSVs of same config (adds 'run_id' column for traceability) - Optionally merges PCAPs using mergecap - Flattens date structure """ import os import csv import subprocess import shutil from pathlib import Path import argparse from collections import defaultdict def parse_filename(filename): """ Extract config key from filename. Format: protocol[-flags]-timestamp.{csv,pcap} Config key: protocol[-flags] (ignores timestamp) """ base = filename.replace('.csv', '').replace('.pcap', '') parts = base.split('-') if len(parts) < 2: return None # Config is everything except timestamp config = '-'.join(parts[:-1]) timestamp = parts[-1] return config, timestamp def extract_resolver_from_path(file_path): """Extract resolver name from path""" parts = Path(file_path).parts for part in parts: if part.lower() in ['cloudflare', 'google', 'quad9', 'adguard']: return part.lower() return None def find_files(root_dir, extension): """Find all files with given extension""" files = [] for root, dirs, filenames in os.walk(root_dir): for filename in filenames: if filename.endswith(extension): full_path = os.path.join(root, filename) files.append(full_path) return sorted(files) def merge_csvs(csv_files, output_path, fieldnames): """Merge multiple CSVs into one, adding 'run_id' column""" with open(output_path, 'w', newline='') as outfile: writer = csv.DictWriter(outfile, fieldnames=fieldnames + ['run_id']) writer.writeheader() for csv_path in csv_files: # Use timestamp as run_id filename = Path(csv_path).name _, timestamp = parse_filename(filename) run_id = timestamp # Or add date if needed with open(csv_path, 'r', newline='') as infile: reader = csv.DictReader(infile) for row in reader: row['run_id'] = run_id writer.writerow(row) def merge_pcaps(pcap_files, output_path): """Merge PCAP files using mergecap""" cmd = ['mergecap', '-w', output_path] + pcap_files try: subprocess.run(cmd, capture_output=True, check=True) return True except subprocess.CalledProcessError as e: print(f" ✗ mergecap error: {e.stderr.decode()}") return False except FileNotFoundError: print("Error: mergecap not found. Install Wireshark:") print(" Ubuntu: sudo apt install wireshark-common") print(" macOS: brew install wireshark") return False def format_bytes(bytes_val): """Format bytes as human readable""" for unit in ['B', 'KB', 'MB', 'GB']: if bytes_val < 1024.0: return f"{bytes_val:.1f} {unit}" bytes_val /= 1024.0 return f"{bytes_val:.1f} TB" def main(): parser = argparse.ArgumentParser( description='Merge DNS test files by configuration', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Merges files of same config across dates/timestamps. Output: ./results_merged/[resolver]/[config].csv (merged) ./results_merged/[resolver]/[config].pcap (merged, if --merge-pcaps) Examples: # Dry run to preview %(prog)s ./results --dry-run # Merge CSVs only (recommended) %(prog)s ./results # Merge CSVs and PCAPs %(prog)s ./results --merge-pcaps # Custom output directory %(prog)s ./results --output ./merged_data ''' ) parser.add_argument( 'input_dir', help='Input directory (e.g., ./results)' ) parser.add_argument( '--output', default='./results_merged', help='Output directory (default: ./results_merged)' ) parser.add_argument( '--merge-pcaps', action='store_true', help='Merge PCAP files (requires mergecap from Wireshark)' ) parser.add_argument( '--dry-run', action='store_true', help='Show what would be done without merging' ) parser.add_argument( '-y', '--yes', action='store_true', help='Skip confirmation prompt' ) args = parser.parse_args() if not os.path.isdir(args.input_dir): print(f"Error: Input directory not found: {args.input_dir}") return 1 # Find all files print("=" * 80) print("MERGE DNS TEST FILES") print("=" * 80) print(f"Input: {args.input_dir}") print(f"Output: {args.output}") print(f"Merge PCAPs: {'Yes' if args.merge_pcaps else 'No'}") csv_files = find_files(args.input_dir, '.csv') pcap_files = find_files(args.input_dir, '.pcap') if args.merge_pcaps else [] if not csv_files and not pcap_files: print("\nNo CSV/PCAP files found") return 1 print(f"\nFound {len(csv_files)} CSV files") if args.merge_pcaps: print(f"Found {len(pcap_files)} PCAP files") # Group files by resolver and config csv_groups = defaultdict(list) pcap_groups = defaultdict(list) for csv_path in csv_files: config, _ = parse_filename(Path(csv_path).name) resolver = extract_resolver_from_path(csv_path) if config and resolver: key = (resolver, config) csv_groups[key].append(csv_path) for pcap_path in pcap_files: config, _ = parse_filename(Path(pcap_path).name) resolver = extract_resolver_from_path(pcap_path) if config and resolver: key = (resolver, config) pcap_groups[key].append(pcap_path) # Summary print("\nConfigs to merge:") print("-" * 80) for (resolver, config), files in sorted(csv_groups.items()): print(f" {resolver}/{config}: {len(files)} runs") total_runs = sum(len(files) for files in csv_groups.values()) print(f"\nTotal configs: {len(csv_groups)}") print(f"Total runs: {total_runs}") if args.dry_run: print("\n*** DRY RUN MODE ***\n") for (resolver, config) in sorted(csv_groups.keys()): print(f"Would merge: {resolver}/{config} ({len(csv_groups[(resolver, config)])} CSVs)") if args.merge_pcaps and (resolver, config) in pcap_groups: print(f"Would merge: {resolver}/{config} ({len(pcap_groups[(resolver, config)])} PCAPs)") return 0 # Confirmation if not args.yes: response = input(f"\nMerge all into {args.output}? [y/N] ") if response.lower() not in ['y', 'yes']: print("Cancelled") return 0 # Merge print("\n" + "=" * 80) print("MERGING FILES") print("=" * 80) success_count = 0 fail_count = 0 total_queries = 0 total_size = 0 # Get standard CSV fieldnames (from first file) first_csv = next(iter(csv_files)) with open(first_csv, 'r') as f: reader = csv.DictReader(f) fieldnames = reader.fieldnames for (resolver, config), files in sorted(csv_groups.items()): print(f"\n{resolver}/{config} ({len(files)} runs)") # Merge CSVs output_csv = os.path.join(args.output, resolver, f"{config}.csv") os.makedirs(os.path.dirname(output_csv), exist_ok=True) merge_csvs(files, output_csv, fieldnames) # Count queries in merged file with open(output_csv, 'r') as f: query_count = sum(1 for _ in csv.reader(f)) - 1 # Minus header print(f" ✓ Merged CSV: {query_count:,} queries") total_queries += query_count success_count += 1 # Merge PCAPs if requested if args.merge_pcaps and (resolver, config) in pcap_groups: output_pcap = os.path.join(args.output, resolver, f"{config}.pcap") pcap_list = pcap_groups[(resolver, config)] if merge_pcaps(pcap_list, output_pcap): merged_size = os.path.getsize(output_pcap) orig_size = sum(os.path.getsize(p) for p in pcap_list) print(f" ✓ Merged PCAP: {format_bytes(merged_size)} " f"(from {format_bytes(orig_size)})") total_size += merged_size else: print(f" ✗ PCAP merge failed") fail_count += 1 # Final summary print("\n" + "=" * 80) print("COMPLETE") print("=" * 80) print(f"Successful configs: {success_count}") print(f"Failed: {fail_count}") print(f"Total queries: {total_queries:,}") if args.merge_pcaps: print(f"Total PCAP size: {format_bytes(total_size)}") print(f"\nMerged files in: {args.output}") return 0 if fail_count == 0 else 1 if __name__ == "__main__": exit(main())