feat(profiling): Add CPU and MEM profiling

2026-04-06 13:15:09 +01:00
parent 0dc77c4583
commit cd8a6c5433
17 changed files with 631 additions and 2740 deletions
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+Merge all DNS test CSVs into a single unified CSV.
+Extracts metadata from filenames and directory structure.
+"""
+
+import csv
+import os
+from pathlib import Path
+from dateutil import parser as date_parser
+import argparse
+
+
+def parse_config(filename: str) -> dict:
+    """
+    Parse protocol, dnssec_mode, and keep_alive from filename.
+    
+    Examples:
+        doh3-auth.csv         → protocol=doh3, dnssec=auth, persist=0
+        tls-trust-persist.csv → protocol=tls, dnssec=trust, persist=1
+        https.csv             → protocol=https, dnssec=off, persist=0
+        doudp-auth.csv        → protocol=doudp, dnssec=auth, persist=0
+        dnscrypt-trust.csv    → protocol=dnscrypt, dnssec=trust, persist=0
+    """
+    base = filename.replace('.csv', '')
+    parts = base.split('-')
+    
+    protocol = parts[0]
+    dnssec_mode = 'off'
+    keep_alive = 0
+    
+    for part in parts[1:]:
+        if part in ('auth', 'trust'):
+            dnssec_mode = part
+        elif part == 'persist':
+            keep_alive = 1
+    
+    return {
+        'protocol': protocol,
+        'dnssec_mode': dnssec_mode,
+        'keep_alive': keep_alive,
+    }
+
+
+def parse_timestamp_unix(ts_str: str) -> float:
+    """Convert RFC3339 timestamp to Unix epoch."""
+    try:
+        dt = date_parser.isoparse(ts_str)
+        return dt.timestamp()
+    except Exception:
+        return 0.0
+
+
+def ns_to_ms(duration_ns: str) -> float:
+    """Convert nanoseconds to milliseconds."""
+    try:
+        return float(duration_ns) / 1_000_000
+    except (ValueError, TypeError):
+        return 0.0
+
+
+def find_csv_files(input_dir: Path) -> list:
+    """Find all non-backup CSV files."""
+    files = []
+    for csv_path in input_dir.rglob('*.csv'):
+        if '.bak' in csv_path.name:
+            continue
+        files.append(csv_path)
+    return sorted(files)
+
+
+def merge_all_csvs(input_dir: Path, output_path: Path):
+    """Merge all CSVs into a single file."""
+    
+    csv_files = find_csv_files(input_dir)
+    
+    if not csv_files:
+        print("No CSV files found")
+        return
+    
+    print(f"Found {len(csv_files)} CSV files")
+    
+    # Output columns in desired order
+    output_columns = [
+        'id',
+        'provider',
+        'protocol',
+        'dnssec_mode',
+        'domain',
+        'query_type',
+        'keep_alive',
+        'dns_server',
+        'timestamp',
+        'timestamp_unix',
+        'duration_ns',
+        'duration_ms',
+        'request_size_bytes',
+        'response_size_bytes',
+        'bytes_sent',
+        'bytes_received',
+        'packets_sent',
+        'packets_received',
+        'total_bytes',
+        'response_code',
+        'error',
+    ]
+    
+    global_id = 0
+    total_rows = 0
+    
+    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
+        writer = csv.DictWriter(outfile, fieldnames=output_columns)
+        writer.writeheader()
+        
+        for csv_path in csv_files:
+            # Extract provider from path
+            provider = csv_path.parent.name.lower()
+            
+            # Parse config from filename
+            config = parse_config(csv_path.name)
+            
+            print(f"  {provider}/{csv_path.name} ({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
+            
+            file_rows = 0
+            
+            with open(csv_path, 'r', newline='', encoding='utf-8') as infile:
+                reader = csv.DictReader(infile)
+                
+                for row in reader:
+                    global_id += 1
+                    file_rows += 1
+                    
+                    # Build output row
+                    out_row = {
+                        'id': global_id,
+                        'provider': provider,
+                        'protocol': config['protocol'],
+                        'dnssec_mode': config['dnssec_mode'],
+                        'keep_alive': config['keep_alive'],
+                        'domain': row.get('domain', ''),
+                        'query_type': row.get('query_type', ''),
+                        'dns_server': row.get('dns_server', ''),
+                        'timestamp': row.get('timestamp', ''),
+                        'timestamp_unix': parse_timestamp_unix(row.get('timestamp', '')),
+                        'duration_ns': row.get('duration_ns', ''),
+                        'duration_ms': ns_to_ms(row.get('duration_ns', '')),
+                        'request_size_bytes': row.get('request_size_bytes', ''),
+                        'response_size_bytes': row.get('response_size_bytes', ''),
+                        'bytes_sent': row.get('bytes_sent', ''),
+                        'bytes_received': row.get('bytes_received', ''),
+                        'packets_sent': row.get('packets_sent', ''),
+                        'packets_received': row.get('packets_received', ''),
+                        'total_bytes': row.get('total_bytes', ''),
+                        'response_code': row.get('response_code', ''),
+                        'error': row.get('error', ''),
+                    }
+                    
+                    writer.writerow(out_row)
+            
+            total_rows += file_rows
+            print(f"    → {file_rows:,} rows")
+    
+    print(f"\n{'='*60}")
+    print(f"Output: {output_path}")
+    print(f"Total rows: {total_rows:,}")
+    print(f"{'='*60}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Merge all DNS test CSVs into a single file'
+    )
+    parser.add_argument(
+        'input_dir',
+        nargs='?',
+        default='.',
+        help='Input directory containing provider folders (default: .)'
+    )
+    parser.add_argument(
+        '-o', '--output',
+        default='dns_results.csv',
+        help='Output CSV path (default: dns_results.csv)'
+    )
+    
+    args = parser.parse_args()
+    
+    input_dir = Path(args.input_dir)
+    output_path = Path(args.output)
+    
+    if not input_dir.exists():
+        print(f"Error: Input directory not found: {input_dir}")
+        return 1
+    
+    print("="*60)
+    print("MERGE ALL DNS CSVs")
+    print("="*60)
+    print(f"Input:  {input_dir}")
+    print(f"Output: {output_path}")
+    print()
+    
+    merge_all_csvs(input_dir, output_path)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())