From 0c03bcbcfe998cd140d284938f664d1c8e507aa1 Mon Sep 17 00:00:00 2001 From: afonsofrancof Date: Wed, 8 Apr 2026 10:26:57 +0100 Subject: [PATCH] feat(scripts): Add cpu and mem merge script --- scripts/post_processing/merge_cpu.py | 105 ++++++++++++++++++++++++ scripts/post_processing/merge_files.py | 4 +- scripts/post_processing/merge_mem.py | 108 +++++++++++++++++++++++++ 3 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 scripts/post_processing/merge_cpu.py create mode 100644 scripts/post_processing/merge_mem.py diff --git a/scripts/post_processing/merge_cpu.py b/scripts/post_processing/merge_cpu.py new file mode 100644 index 0000000..992f861 --- /dev/null +++ b/scripts/post_processing/merge_cpu.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Merge all .cpu.csv files into a single unified CPU metrics CSV. +Adds provider, protocol, dnssec_mode, keep_alive columns. +""" + +import csv +import argparse +from pathlib import Path +from typing import List + + +def parse_config_from_filename(filename: str) -> dict: + """Parse protocol, dnssec_mode, keep_alive from filename like 'dot-trust-persist.cpu.csv'""" + base = filename.replace('.cpu.csv', '').replace('.CPU.csv', '') + parts = base.split('-') + + protocol = parts[0] + dnssec_mode = 'off' + keep_alive = 0 + + for part in parts[1:]: + if part in ('auth', 'trust'): + dnssec_mode = part + elif part == 'persist': + keep_alive = 1 + + return { + 'protocol': protocol, + 'dnssec_mode': dnssec_mode, + 'keep_alive': keep_alive, + } + + +def find_cpu_files(input_dir: Path): + files: List[Path] = [] + for p in input_dir.rglob('*.cpu.csv'): + if '.bak' not in p.name: + files.append(p) + return sorted(files) + + +def merge_cpu_files(input_dir: Path, output_path: Path): + cpu_files = find_cpu_files(input_dir) + + if not cpu_files: + print("No .cpu.csv files found") + return + + print(f"Found {len(cpu_files)} CPU metric files") + + output_columns = [ + 'id','provider', 'protocol', 'dnssec_mode', 'keep_alive', + 'timestamp', 'wall_time_seconds', 'instructions', 'cycles', 'peak_rss_kb' + ] + + total_rows = 0 + + with open(output_path, 'w', newline='', encoding='utf-8') as outfile: + writer = csv.DictWriter(outfile, fieldnames=output_columns) + writer.writeheader() + + for cpu_path in cpu_files: + provider = cpu_path.parent.name.lower() + config = parse_config_from_filename(cpu_path.name) + + print(f" {provider}/{cpu_path.name} " + f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})") + + with open(cpu_path, 'r', newline='', encoding='utf-8') as infile: + reader = csv.DictReader(infile) + for row in reader: + total_rows += 1 + out_row = { + 'id': total_rows, + 'provider': provider, + 'protocol': config['protocol'], + 'dnssec_mode': config['dnssec_mode'], + 'keep_alive': config['keep_alive'], + 'timestamp': row.get('timestamp', ''), + 'wall_time_seconds': row.get('wall_time_seconds', ''), + 'instructions': row.get('instructions', ''), + 'cycles': row.get('cycles', ''), + 'peak_rss_kb': row.get('peak_rss_kb', ''), + } + writer.writerow(out_row) + + print(f"\n{'='*60}") + print(f"CPU metrics merged → {output_path}") + print(f"Total run records: {total_rows}") + print(f"{'='*60}") + + +def main(): + parser = argparse.ArgumentParser(description='Merge all .cpu.csv files') + parser.add_argument('input_dir', nargs='?', default='.', help='Input directory') + parser.add_argument('-o', '--output', default='dns_results_cpu.csv', help='Output path') + args = parser.parse_args() + + merge_cpu_files(Path(args.input_dir), Path(args.output)) + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/scripts/post_processing/merge_files.py b/scripts/post_processing/merge_files.py index ab9d39f..8db08f0 100644 --- a/scripts/post_processing/merge_files.py +++ b/scripts/post_processing/merge_files.py @@ -5,7 +5,6 @@ Extracts metadata from filenames and directory structure. """ import csv -import os from pathlib import Path from dateutil import parser as date_parser import argparse @@ -63,7 +62,8 @@ def find_csv_files(input_dir: Path) -> list: """Find all non-backup CSV files.""" files = [] for csv_path in input_dir.rglob('*.csv'): - if '.bak' in csv_path.name: + name = csv_path.name.lower() + if '.bak' in name or name.endswith('.cpu.csv') or name.endswith('.mem.csv'): continue files.append(csv_path) return sorted(files) diff --git a/scripts/post_processing/merge_mem.py b/scripts/post_processing/merge_mem.py new file mode 100644 index 0000000..0cd6963 --- /dev/null +++ b/scripts/post_processing/merge_mem.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Merge all .mem.csv files into a single unified Memory metrics CSV. +Adds provider, protocol, dnssec_mode, keep_alive columns. +""" + +import csv +import argparse +from pathlib import Path +from typing import List + + +def parse_config_from_filename(filename: str) -> dict: + """Parse protocol, dnssec_mode, keep_alive from filename""" + base = filename.replace('.mem.csv', '').replace('.MEM.csv', '') + parts = base.split('-') + + protocol = parts[0] + dnssec_mode = 'off' + keep_alive = 0 + + for part in parts[1:]: + if part in ('auth', 'trust'): + dnssec_mode = part + elif part == 'persist': + keep_alive = 1 + + return { + 'protocol': protocol, + 'dnssec_mode': dnssec_mode, + 'keep_alive': keep_alive, + } + + +def find_mem_files(input_dir: Path): + files: List[Path] = [] + for p in input_dir.rglob('*.mem.csv'): + if '.bak' not in p.name: + files.append(p) + return sorted(files) + + +def merge_mem_files(input_dir: Path, output_path: Path): + mem_files = find_mem_files(input_dir) + + if not mem_files: + print("No .mem.csv files found") + return + + print(f"Found {len(mem_files)} Memory metric files") + + output_columns = [ + 'id',' provider', 'protocol', 'dnssec_mode', 'keep_alive', + 'timestamp', 'total_alloc_bytes', 'mallocs', 'gc_cycles', + 'alloc_delta', 'mallocs_delta', 'gc_delta' + ] + + total_rows = 0 + + with open(output_path, 'w', newline='', encoding='utf-8') as outfile: + writer = csv.DictWriter(outfile, fieldnames=output_columns) + writer.writeheader() + + for mem_path in mem_files: + provider = mem_path.parent.name.lower() + config = parse_config_from_filename(mem_path.name) + + print(f" {provider}/{mem_path.name} " + f"({config['protocol']}, {config['dnssec_mode']}, persist={config['keep_alive']})") + + with open(mem_path, 'r', newline='', encoding='utf-8') as infile: + reader = csv.DictReader(infile) + for row in reader: + total_rows += 1 + out_row = { + 'id': total_rows, + 'provider': provider, + 'protocol': config['protocol'], + 'dnssec_mode': config['dnssec_mode'], + 'keep_alive': config['keep_alive'], + 'timestamp': row.get('timestamp', ''), + 'total_alloc_bytes': row.get('total_alloc_bytes', ''), + 'mallocs': row.get('mallocs', ''), + 'gc_cycles': row.get('gc_cycles', ''), + 'alloc_delta': row.get('alloc_delta', ''), + 'mallocs_delta': row.get('mallocs_delta', ''), + 'gc_delta': row.get('gc_delta', ''), + } + writer.writerow(out_row) + + print(f"\n{'='*60}") + print(f"Memory metrics merged → {output_path}") + print(f"Total run records: {total_rows}") + print(f"{'='*60}") + + +def main(): + parser = argparse.ArgumentParser(description='Merge all .mem.csv files') + parser.add_argument('input_dir', nargs='?', default='.', help='Input directory') + parser.add_argument('-o', '--output', default='dns_results_mem.csv', help='Output path') + args = parser.parse_args() + + merge_mem_files(Path(args.input_dir), Path(args.output)) + return 0 + + +if __name__ == '__main__': + exit(main())