Files
sdns-proxy/scripts/tools/add_extra_metrics_to_csv.py

285 lines
8.4 KiB
Python

#!/usr/bin/env python3
"""
Add network metrics from PCAP files to DNS CSV files.
Adds: pcap_network_bytes_in, pcap_network_bytes_out, pcap_overhead_bytes
"""
import csv
import os
import argparse
from pathlib import Path
from datetime import datetime, timezone
import dpkt
import socket
# Test machine IPs
TEST_IPS = {
'10.0.0.50',
'2001:818:e73e:ba00:5506:dfd4:ed8b:96e',
'fe80::fe98:c62e:4463:9a2d'
}
def inet_to_str(inet):
"""Convert inet bytes to IP string"""
try:
return socket.inet_ntop(socket.AF_INET, inet)
except ValueError:
try:
return socket.inet_ntop(socket.AF_INET6, inet)
except ValueError:
return None
def read_pcap(pcap_path):
"""Read PCAP and return list of (timestamp_ns, size, src_ip, dst_ip)"""
packets = []
with open(pcap_path, 'rb') as f:
try:
pcap = dpkt.pcap.Reader(f)
except:
f.seek(0)
pcap = dpkt.pcapng.Reader(f)
for ts, buf in pcap:
try:
# Convert PCAP timestamp (float seconds) to nanoseconds
timestamp_ns = int(ts * 1_000_000_000)
size = len(buf)
eth = dpkt.ethernet.Ethernet(buf)
src_ip = dst_ip = None
if isinstance(eth.data, dpkt.ip.IP):
src_ip = inet_to_str(eth.data.src)
dst_ip = inet_to_str(eth.data.dst)
elif isinstance(eth.data, dpkt.ip6.IP6):
src_ip = inet_to_str(eth.data.src)
dst_ip = inet_to_str(eth.data.dst)
if src_ip and dst_ip:
packets.append((timestamp_ns, size, src_ip, dst_ip))
except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError):
continue
return packets
def find_packets_in_window(packets, start_ns, duration_ns):
"""Find packets within exact time window (nanosecond precision)"""
end_ns = start_ns + duration_ns
matching = []
for timestamp_ns, size, src_ip, dst_ip in packets:
if start_ns <= timestamp_ns <= end_ns:
matching.append((size, src_ip, dst_ip))
return matching
def calculate_metrics(packets):
"""Calculate network metrics from packets"""
bytes_in = 0
bytes_out = 0
for size, src_ip, dst_ip in packets:
if dst_ip in TEST_IPS:
bytes_in += size
elif src_ip in TEST_IPS:
bytes_out += size
return {
'pcap_network_bytes_in': bytes_in,
'pcap_network_bytes_out': bytes_out,
'pcap_overhead_bytes': bytes_in + bytes_out
}
def parse_timestamp_to_ns(ts_str):
"""Parse ISO timestamp to nanoseconds since epoch"""
try:
dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
if dt.tzinfo is not None:
dt = dt.astimezone(timezone.utc)
# Convert to nanoseconds since epoch
return int(dt.timestamp() * 1_000_000_000)
except ValueError:
return None
def enhance_csv(csv_path, pcap_path, output_path, debug=False):
"""Add PCAP metrics to CSV"""
if not os.path.exists(pcap_path):
print(f"⚠️ PCAP not found: {pcap_path}")
return False
print(f"Processing: {os.path.basename(csv_path)}")
# Read PCAP
try:
packets = read_pcap(pcap_path)
print(f" Loaded {len(packets)} packets")
if packets and debug:
first_pcap_ns = packets[0][0]
last_pcap_ns = packets[-1][0]
print(f" First PCAP packet: {first_pcap_ns} ns")
print(f" Last PCAP packet: {last_pcap_ns} ns")
print(f" PCAP duration: {(last_pcap_ns - first_pcap_ns) / 1e9:.3f}s")
except Exception as e:
print(f" ❌ Error reading PCAP: {e}")
return False
if not packets:
print(" ❌ No packets found")
return False
# Read CSV
with open(csv_path, 'r', newline='') as f:
reader = csv.DictReader(f)
fieldnames = list(reader.fieldnames) + [
'pcap_network_bytes_in',
'pcap_network_bytes_out',
'pcap_overhead_bytes'
]
rows = list(reader)
if rows and debug:
first_csv_ns = parse_timestamp_to_ns(rows[0]['timestamp'])
last_csv_ns = parse_timestamp_to_ns(rows[-1]['timestamp'])
if first_csv_ns and last_csv_ns:
print(f" First CSV query: {first_csv_ns} ns")
print(f" Last CSV query: {last_csv_ns} ns")
print(f" CSV duration: {(last_csv_ns - first_csv_ns) / 1e9:.3f}s")
# Check alignment
offset_ns = packets[0][0] - first_csv_ns
print(f" Time offset (PCAP - CSV): {offset_ns / 1e9:.3f}s")
# Enhance rows
enhanced = []
matched = 0
for i, row in enumerate(rows):
ts_ns = parse_timestamp_to_ns(row['timestamp'])
if not ts_ns:
continue
duration_ns = int(row.get('duration_ns', 0))
matching_packets = find_packets_in_window(packets, ts_ns, duration_ns)
metrics = calculate_metrics(matching_packets)
row.update(metrics)
enhanced.append(row)
if metrics['pcap_overhead_bytes'] > 0:
matched += 1
# Debug first few queries
if debug and i < 3:
print(f" Query {i}: {row['domain']}")
print(f" Start: {ts_ns} ns")
print(f" Duration: {duration_ns} ns ({duration_ns / 1e6:.3f}ms)")
print(f" End: {ts_ns + duration_ns} ns")
print(f" Matched packets: {len(matching_packets)}")
print(f" Bytes: {metrics['pcap_overhead_bytes']}")
print(f" Matched: {matched}/{len(rows)} queries")
if matched == 0:
print(" ⚠️ WARNING: No queries matched any packets!")
print(" This might indicate timestamp misalignment.")
# Write output
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
writer.writerows(enhanced)
print(f" ✓ Saved: {output_path}")
return True
def main():
parser = argparse.ArgumentParser(
description='Add PCAP network metrics to DNS CSV files'
)
parser.add_argument('input_dir', help='Input directory (e.g., results_merged)')
parser.add_argument(
'--output',
default='./results_enhanced',
help='Output directory (default: ./results_enhanced)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Preview files without processing'
)
parser.add_argument(
'--debug',
action='store_true',
help='Show detailed timing information'
)
args = parser.parse_args()
print("=" * 60)
print("ENHANCE DNS CSVs WITH PCAP METRICS")
print("=" * 60)
print(f"Input: {args.input_dir}")
print(f"Output: {args.output}")
if args.debug:
print("Debug: ENABLED")
print()
# Find CSV files
csv_files = list(Path(args.input_dir).rglob('*.csv'))
if not csv_files:
print("❌ No CSV files found")
return 1
print(f"Found {len(csv_files)} CSV files\n")
if args.dry_run:
print("DRY RUN - would process:")
for csv_path in csv_files:
pcap_path = csv_path.with_suffix('.pcap')
print(f" {csv_path.relative_to(args.input_dir)}")
print(f" PCAP: {'' if pcap_path.exists() else ''}")
return 0
# Process files
success = 0
failed = 0
for csv_path in csv_files:
pcap_path = csv_path.with_suffix('.pcap')
rel_path = csv_path.relative_to(args.input_dir)
output_path = Path(args.output) / rel_path
if enhance_csv(str(csv_path), str(pcap_path), str(output_path),
args.debug):
success += 1
else:
failed += 1
print()
# Summary
print("=" * 60)
print(f"✓ Success: {success}")
print(f"✗ Failed: {failed}")
print(f"Total: {len(csv_files)}")
print(f"\nOutput: {args.output}")
return 0 if failed == 0 else 1
if __name__ == "__main__":
exit(main())