From 0c03bcbcfe998cd140d284938f664d1c8e507aa1 Mon Sep 17 00:00:00 2001
From: afonsofrancof <afonso@francof.net>
Date: Wed, 8 Apr 2026 10:26:57 +0100
Subject: [PATCH] feat(scripts): Add cpu and mem merge script

---
 scripts/post_processing/merge_cpu.py   | 105 ++++++++++++++++++++++++
 scripts/post_processing/merge_files.py |   4 +-
 scripts/post_processing/merge_mem.py   | 108 +++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 2 deletions(-)
 create mode 100644 scripts/post_processing/merge_cpu.py
 create mode 100644 scripts/post_processing/merge_mem.py

diff --git a/scripts/post_processing/merge_cpu.py b/scripts/post_processing/merge_cpu.py
new file mode 100644
index 0000000..992f861
--- /dev/null
+++ b/scripts/post_processing/merge_cpu.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Merge all .cpu.csv files into a single unified CPU metrics CSV.
+Adds provider, protocol, dnssec_mode, keep_alive columns.
+"""
+
+import csv
+import argparse
+from pathlib import Path
+from typing import List
+
+
+def parse_config_from_filename(filename: str) -> dict:
+    """Parse protocol, dnssec_mode, keep_alive from filename like 'dot-trust-persist.cpu.csv'"""
+    base = filename.replace('.cpu.csv', '').replace('.CPU.csv', '')
+    parts = base.split('-')
+    
+    protocol = parts[0]
+    dnssec_mode = 'off'
+    keep_alive = 0
+    
+    for part in parts[1:]:
+        if part in ('auth', 'trust'):
+            dnssec_mode = part
+        elif part == 'persist':
+            keep_alive = 1
+    
+    return {
+        'protocol': protocol,
+        'dnssec_mode': dnssec_mode,
+        'keep_alive': keep_alive,
+    }
+
+
+def find_cpu_files(input_dir: Path):
+    files: List[Path] = []
+    for p in input_dir.rglob('*.cpu.csv'):
+        if '.bak' not in p.name:
+            files.append(p)
+    return sorted(files)
+
+
+def merge_cpu_files(input_dir: Path, output_path: Path):
+    cpu_files = find_cpu_files(input_dir)
+    
+    if not cpu_files:
+        print("No .cpu.csv files found")
+        return
+    
+    print(f"Found {len(cpu_files)} CPU metric files")
+    
+    output_columns = [
+        'id','provider', 'protocol', 'dnssec_mode', 'keep_alive',
+        'timestamp', 'wall_time_seconds', 'instructions', 'cycles', 'peak_rss_kb'
+    ]
+    
+    total_rows = 0
+    
+    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
+        writer = csv.DictWriter(outfile, fieldnames=output_columns)
+        writer.writeheader()
+        
+        for cpu_path in cpu_files:
+            provider = cpu_path.parent.name.lower()
+            config = parse_config_from_filename(cpu_path.name)
+            
+            print(f"  {provider}/{cpu_path.name} "
+                  f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
+            
+            with open(cpu_path, 'r', newline='', encoding='utf-8') as infile:
+                reader = csv.DictReader(infile)
+                for row in reader:
+                    total_rows += 1
+                    out_row = {
+                        'id': total_rows,
+                        'provider': provider,
+                        'protocol': config['protocol'],
+                        'dnssec_mode': config['dnssec_mode'],
+                        'keep_alive': config['keep_alive'],
+                        'timestamp': row.get('timestamp', ''),
+                        'wall_time_seconds': row.get('wall_time_seconds', ''),
+                        'instructions': row.get('instructions', ''),
+                        'cycles': row.get('cycles', ''),
+                        'peak_rss_kb': row.get('peak_rss_kb', ''),
+                    }
+                    writer.writerow(out_row)
+    
+    print(f"\n{'='*60}")
+    print(f"CPU metrics merged → {output_path}")
+    print(f"Total run records: {total_rows}")
+    print(f"{'='*60}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Merge all .cpu.csv files')
+    parser.add_argument('input_dir', nargs='?', default='.', help='Input directory')
+    parser.add_argument('-o', '--output', default='dns_results_cpu.csv', help='Output path')
+    args = parser.parse_args()
+    
+    merge_cpu_files(Path(args.input_dir), Path(args.output))
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/scripts/post_processing/merge_files.py b/scripts/post_processing/merge_files.py
index ab9d39f..8db08f0 100644
--- a/scripts/post_processing/merge_files.py
+++ b/scripts/post_processing/merge_files.py
@@ -5,7 +5,6 @@ Extracts metadata from filenames and directory structure.
 """
 
 import csv
-import os
 from pathlib import Path
 from dateutil import parser as date_parser
 import argparse
@@ -63,7 +62,8 @@ def find_csv_files(input_dir: Path) -> list:
     """Find all non-backup CSV files."""
     files = []
     for csv_path in input_dir.rglob('*.csv'):
-        if '.bak' in csv_path.name:
+        name = csv_path.name.lower()
+        if '.bak' in name or name.endswith('.cpu.csv') or name.endswith('.mem.csv'):
             continue
         files.append(csv_path)
     return sorted(files)
diff --git a/scripts/post_processing/merge_mem.py b/scripts/post_processing/merge_mem.py
new file mode 100644
index 0000000..0cd6963
--- /dev/null
+++ b/scripts/post_processing/merge_mem.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""
+Merge all .mem.csv files into a single unified Memory metrics CSV.
+Adds provider, protocol, dnssec_mode, keep_alive columns.
+"""
+
+import csv
+import argparse
+from pathlib import Path
+from typing import List
+
+
+def parse_config_from_filename(filename: str) -> dict:
+    """Parse protocol, dnssec_mode, keep_alive from filename"""
+    base = filename.replace('.mem.csv', '').replace('.MEM.csv', '')
+    parts = base.split('-')
+    
+    protocol = parts[0]
+    dnssec_mode = 'off'
+    keep_alive = 0
+    
+    for part in parts[1:]:
+        if part in ('auth', 'trust'):
+            dnssec_mode = part
+        elif part == 'persist':
+            keep_alive = 1
+    
+    return {
+        'protocol': protocol,
+        'dnssec_mode': dnssec_mode,
+        'keep_alive': keep_alive,
+    }
+
+
+def find_mem_files(input_dir: Path):
+    files: List[Path] = []
+    for p in input_dir.rglob('*.mem.csv'):
+        if '.bak' not in p.name:
+            files.append(p)
+    return sorted(files)
+
+
+def merge_mem_files(input_dir: Path, output_path: Path):
+    mem_files = find_mem_files(input_dir)
+    
+    if not mem_files:
+        print("No .mem.csv files found")
+        return
+    
+    print(f"Found {len(mem_files)} Memory metric files")
+    
+    output_columns = [
+        'id',' provider', 'protocol', 'dnssec_mode', 'keep_alive',
+        'timestamp', 'total_alloc_bytes', 'mallocs', 'gc_cycles',
+        'alloc_delta', 'mallocs_delta', 'gc_delta'
+    ]
+    
+    total_rows = 0
+    
+    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
+        writer = csv.DictWriter(outfile, fieldnames=output_columns)
+        writer.writeheader()
+        
+        for mem_path in mem_files:
+            provider = mem_path.parent.name.lower()
+            config = parse_config_from_filename(mem_path.name)
+            
+            print(f"  {provider}/{mem_path.name} "
+                  f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})")
+            
+            with open(mem_path, 'r', newline='', encoding='utf-8') as infile:
+                reader = csv.DictReader(infile)
+                for row in reader:
+                    total_rows += 1
+                    out_row = {
+                        'id': total_rows,
+                        'provider': provider,
+                        'protocol': config['protocol'],
+                        'dnssec_mode': config['dnssec_mode'],
+                        'keep_alive': config['keep_alive'],
+                        'timestamp': row.get('timestamp', ''),
+                        'total_alloc_bytes': row.get('total_alloc_bytes', ''),
+                        'mallocs': row.get('mallocs', ''),
+                        'gc_cycles': row.get('gc_cycles', ''),
+                        'alloc_delta': row.get('alloc_delta', ''),
+                        'mallocs_delta': row.get('mallocs_delta', ''),
+                        'gc_delta': row.get('gc_delta', ''),
+                    }
+                    writer.writerow(out_row)
+    
+    print(f"\n{'='*60}")
+    print(f"Memory metrics merged → {output_path}")
+    print(f"Total run records: {total_rows}")
+    print(f"{'='*60}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Merge all .mem.csv files')
+    parser.add_argument('input_dir', nargs='?', default='.', help='Input directory')
+    parser.add_argument('-o', '--output', default='dns_results_mem.csv', help='Output path')
+    args = parser.parse_args()
+    
+    merge_mem_files(Path(args.input_dir), Path(args.output))
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())