feat(scripts): Add cpu and mem merge script

2026-04-08 10:26:57 +01:00
parent c120418cae
commit 0c03bcbcfe
3 changed files with 215 additions and 2 deletions
@@ -0,0 +1,105 @@
 #!/usr/bin/env python3
 """
 Merge all .cpu.csv files into a single unified CPU metrics CSV.
 Adds provider, protocol, dnssec_mode, keep_alive columns.
 """
 import csv
 import argparse
 from pathlib import Path
 from typing import List
 def parse_config_from_filename(filename: str) -> dict:
    """Parse protocol, dnssec_mode, keep_alive from filename like 'dot-trust-persist.cpu.csv'"""
    base = filename.replace('.cpu.csv', '').replace('.CPU.csv', '')
    parts = base.split('-')
    protocol = parts[0]
    dnssec_mode = 'off'
    keep_alive = 0
    for part in parts[1:]:
        if part in ('auth', 'trust'):
            dnssec_mode = part
        elif part == 'persist':
            keep_alive = 1
    return {
        'protocol': protocol,
        'dnssec_mode': dnssec_mode,
        'keep_alive': keep_alive,
    }
 def find_cpu_files(input_dir: Path):
    files: List[Path] = []
    for p in input_dir.rglob('*.cpu.csv'):
        if '.bak' not in p.name:
            files.append(p)
    return sorted(files)
 def merge_cpu_files(input_dir: Path, output_path: Path):
    cpu_files = find_cpu_files(input_dir)
    if not cpu_files:
        print("No .cpu.csv files found")
        return
    print(f"Found {len(cpu_files)} CPU metric files")
    output_columns = [
        'id','provider', 'protocol', 'dnssec_mode', 'keep_alive',
        'timestamp', 'wall_time_seconds', 'instructions', 'cycles', 'peak_rss_kb'
    ]
    total_rows = 0
    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=output_columns)
        writer.writeheader()
        for cpu_path in cpu_files:
            provider = cpu_path.parent.name.lower()
            config = parse_config_from_filename(cpu_path.name)
            print(f"  {provider}/{cpu_path.name} "
                  f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
            with open(cpu_path, 'r', newline='', encoding='utf-8') as infile:
                reader = csv.DictReader(infile)
                for row in reader:
                    total_rows += 1
                    out_row = {
                        'id': total_rows,
                        'provider': provider,
                        'protocol': config['protocol'],
                        'dnssec_mode': config['dnssec_mode'],
                        'keep_alive': config['keep_alive'],
                        'timestamp': row.get('timestamp', ''),
                        'wall_time_seconds': row.get('wall_time_seconds', ''),
                        'instructions': row.get('instructions', ''),
                        'cycles': row.get('cycles', ''),
                        'peak_rss_kb': row.get('peak_rss_kb', ''),
                    }
                    writer.writerow(out_row)
    print(f"\n{'='*60}")
    print(f"CPU metrics merged → {output_path}")
    print(f"Total run records: {total_rows}")
    print(f"{'='*60}")
 def main():
    parser = argparse.ArgumentParser(description='Merge all .cpu.csv files')
    parser.add_argument('input_dir', nargs='?', default='.', help='Input directory')
    parser.add_argument('-o', '--output', default='dns_results_cpu.csv', help='Output path')
    args = parser.parse_args()
    merge_cpu_files(Path(args.input_dir), Path(args.output))
    return 0
 if __name__ == '__main__':
    exit(main())
@@ -5,7 +5,6 @@ Extracts metadata from filenames and directory structure.
 """
 import csv
 import os
 from pathlib import Path
 from dateutil import parser as date_parser
 import argparse
@@ -63,7 +62,8 @@ def find_csv_files(input_dir: Path) -> list:
    """Find all non-backup CSV files."""
    files = []
    for csv_path in input_dir.rglob('*.csv'):
-        if '.bak' in csv_path.name:
+        name = csv_path.name.lower()
        if '.bak' in name or name.endswith('.cpu.csv') or name.endswith('.mem.csv'):
            continue
        files.append(csv_path)
    return sorted(files)
@@ -0,0 +1,108 @@
 #!/usr/bin/env python3
 """
 Merge all .mem.csv files into a single unified Memory metrics CSV.
 Adds provider, protocol, dnssec_mode, keep_alive columns.
 """
 import csv
 import argparse
 from pathlib import Path
 from typing import List
 def parse_config_from_filename(filename: str) -> dict:
    """Parse protocol, dnssec_mode, keep_alive from filename"""
    base = filename.replace('.mem.csv', '').replace('.MEM.csv', '')
    parts = base.split('-')
    protocol = parts[0]
    dnssec_mode = 'off'
    keep_alive = 0
    for part in parts[1:]:
        if part in ('auth', 'trust'):
            dnssec_mode = part
        elif part == 'persist':
            keep_alive = 1
    return {
        'protocol': protocol,
        'dnssec_mode': dnssec_mode,
        'keep_alive': keep_alive,
    }
 def find_mem_files(input_dir: Path):
    files: List[Path] = []
    for p in input_dir.rglob('*.mem.csv'):
        if '.bak' not in p.name:
            files.append(p)
    return sorted(files)
 def merge_mem_files(input_dir: Path, output_path: Path):
    mem_files = find_mem_files(input_dir)
    if not mem_files:
        print("No .mem.csv files found")
        return
    print(f"Found {len(mem_files)} Memory metric files")
    output_columns = [
        'id',' provider', 'protocol', 'dnssec_mode', 'keep_alive',
        'timestamp', 'total_alloc_bytes', 'mallocs', 'gc_cycles',
        'alloc_delta', 'mallocs_delta', 'gc_delta'
    ]
    total_rows = 0
    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=output_columns)
        writer.writeheader()
        for mem_path in mem_files:
            provider = mem_path.parent.name.lower()
            config = parse_config_from_filename(mem_path.name)
            print(f"  {provider}/{mem_path.name} "
                  f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
            with open(mem_path, 'r', newline='', encoding='utf-8') as infile:
                reader = csv.DictReader(infile)
                for row in reader:
                    total_rows += 1
                    out_row = {
                        'id': total_rows,
                        'provider': provider,
                        'protocol': config['protocol'],
                        'dnssec_mode': config['dnssec_mode'],
                        'keep_alive': config['keep_alive'],
                        'timestamp': row.get('timestamp', ''),
                        'total_alloc_bytes': row.get('total_alloc_bytes', ''),
                        'mallocs': row.get('mallocs', ''),
                        'gc_cycles': row.get('gc_cycles', ''),
                        'alloc_delta': row.get('alloc_delta', ''),
                        'mallocs_delta': row.get('mallocs_delta', ''),
                        'gc_delta': row.get('gc_delta', ''),
                    }
                    writer.writerow(out_row)
    print(f"\n{'='*60}")
    print(f"Memory metrics merged → {output_path}")
    print(f"Total run records: {total_rows}")
    print(f"{'='*60}")
 def main():
    parser = argparse.ArgumentParser(description='Merge all .mem.csv files')
    parser.add_argument('input_dir', nargs='?', default='.', help='Input directory')
    parser.add_argument('-o', '--output', default='dns_results_mem.csv', help='Output path')
    args = parser.parse_args()
    merge_mem_files(Path(args.input_dir), Path(args.output))
    return 0
 if __name__ == '__main__':
    exit(main())