feat(dns): add dnscrypt and dns over tcp

2026-02-04 22:08:05 +00:00
parent 5d9b630d13
commit 92351a80a9
12 changed files with 2576 additions and 568 deletions
@@ -8,10 +8,12 @@ import (

 	"github.com/afonsofrancof/sdns-proxy/common/dnssec"
 	"github.com/afonsofrancof/sdns-proxy/common/logger"
-	"github.com/afonsofrancof/sdns-proxy/common/protocols/do53"
+	"github.com/afonsofrancof/sdns-proxy/common/protocols/dnscrypt"
 	"github.com/afonsofrancof/sdns-proxy/common/protocols/doh"
 	"github.com/afonsofrancof/sdns-proxy/common/protocols/doq"
 	"github.com/afonsofrancof/sdns-proxy/common/protocols/dot"
+	"github.com/afonsofrancof/sdns-proxy/common/protocols/dotcp"
+	"github.com/afonsofrancof/sdns-proxy/common/protocols/doudp"
 	"github.com/miekg/dns"
 )

@@ -37,7 +39,7 @@ type Options struct {
 func New(upstream string, opts Options) (DNSClient, error) {
 	logger.Debug("Creating DNS client for upstream: %s with options: %+v", upstream, opts)

-	// Try to parse as URL first
+	// Try to parse as URL
 	parsedURL, err := url.Parse(upstream)
 	if err != nil {
 		logger.Error("Invalid upstream format: %v", err)
@@ -51,7 +53,7 @@ func New(upstream string, opts Options) (DNSClient, error) {
 		logger.Debug("Parsing %s as URL with scheme %s", upstream, parsedURL.Scheme)
 		baseClient, err = createClientFromURL(parsedURL, opts)
 	} else {
-		// No scheme - treat as plain DNS address
+		// No scheme - treat as plain DNS address (defaults to UDP)
 		logger.Debug("Parsing %s as plain DNS address", upstream)
 		baseClient, err = createClientFromPlainAddress(upstream, opts)
 	}
@@ -200,7 +202,8 @@ func createClientFromPlainAddress(address string, opts Options) (DNSClient, erro
 	}

 	logger.Debug("Creating client from plain address: host=%s, port=%s", host, port)
-	return createClient("", host, port, "", opts)
+	// Default to UDP for plain addresses
+	return createClient("udp", host, port, "", opts)
 }

 func getDefaultPort(scheme string) string {
@@ -212,6 +215,8 @@ func getDefaultPort(scheme string) string {
 		port = "853"
 	case "quic", "doq":
 		port = "853"
+	case "dnscrypt":
+		port = "443"
 	}
 	logger.Debug("Default port for scheme %s: %s", scheme, port)
 	return port
@@ -232,13 +237,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err
 		scheme, host, port, path, opts.DNSSEC, opts.KeepAlive)

 	switch scheme {
-	case "udp", "tcp", "do53", "":
-		config := do53.Config{
+	case "udp", "doudp", "":
+		config := doudp.Config{
 			HostAndPort: net.JoinHostPort(host, port),
 			DNSSEC:      opts.DNSSEC,
 		}
-		logger.Debug("Creating DO53 client with config: %+v", config)
-		return do53.New(config)
+		logger.Debug("Creating DoUDP client with config: %+v", config)
+		return doudp.New(config)
+
+	case "tcp", "dotcp":
+		config := dotcp.Config{
+			HostAndPort: net.JoinHostPort(host, port),
+			DNSSEC:      opts.DNSSEC,
+			KeepAlive:   opts.KeepAlive,
+		}
+		logger.Debug("Creating DoTCP client with config: %+v", config)
+		return dotcp.New(config)

 	case "https", "doh":
 		config := doh.Config{
@@ -274,11 +288,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err
 		logger.Debug("Creating DoT client with config: %+v", config)
 		return dot.New(config)

+	case "sdns":
+		config := dnscrypt.Config{
+			// Janky solution but whatever
+			// Here we rejoin them as the client wants them together
+			// The host is not really a host but whatever
+			ServerStamp:  fmt.Sprintf("%v://%v",scheme,host),
+			DNSSEC:      opts.DNSSEC,
+		}
+		logger.Debug("Creating DNSCrypt client with stamp")
+		return dnscrypt.New(config)
+
 	case "doq":
 		config := doq.Config{
-			Host:      host,
-			Port:      port,
-			DNSSEC:    opts.DNSSEC,
+			Host:   host,
+			Port:   port,
+			DNSSEC: opts.DNSSEC,
 		}
 		logger.Debug("Creating DoQ client with config: %+v", config)
 		return doq.New(config)
@@ -1,3 +1,85 @@
 package dnscrypt

-// DNSCrypt resolver implementation
+import (
+	"fmt"
+	"time"
+
+	"github.com/afonsofrancof/sdns-proxy/common/logger"
+	"github.com/ameshkov/dnscrypt/v2"
+	"github.com/miekg/dns"
+)
+
+type Config struct {
+	ServerStamp  string
+	DNSSEC       bool
+	WriteTimeout time.Duration
+	ReadTimeout  time.Duration
+}
+
+type Client struct {
+	resolver *dnscrypt.Client
+	config   Config
+	ri       *dnscrypt.ResolverInfo
+}
+
+func New(config Config) (*Client, error) {
+	logger.Debug("Creating DNSCrypt client with stamp: %s", config.ServerStamp)
+
+	if config.ServerStamp == "" {
+		logger.Error("DNSCrypt client creation failed: empty ServerStamp")
+		return nil, fmt.Errorf("dnscrypt: ServerStamp cannot be empty")
+	}
+	if config.WriteTimeout <= 0 {
+		config.WriteTimeout = 5 * time.Second
+	}
+	if config.ReadTimeout <= 0 {
+		config.ReadTimeout = 10 * time.Second
+	}
+
+	resolver := &dnscrypt.Client{
+		Net:     "udp",
+		Timeout: config.ReadTimeout,
+	}
+
+	// Resolve the server info from the stamp
+	ri, err := resolver.Dial(config.ServerStamp)
+	if err != nil {
+		logger.Error("DNSCrypt failed to dial server: %v", err)
+		return nil, fmt.Errorf("dnscrypt: failed to dial server: %w", err)
+	}
+
+	logger.Debug("DNSCrypt client created (DNSSEC: %v)", config.DNSSEC)
+
+	return &Client{
+		resolver: resolver,
+		config:   config,
+		ri:       ri,
+	}, nil
+}
+
+func (c *Client) Close() {
+	// The dnscrypt library doesn't require explicit cleanup
+}
+
+func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
+	if len(msg.Question) > 0 {
+		question := msg.Question[0]
+		logger.Debug("DNSCrypt query: %s %s", question.Name, dns.TypeToString[question.Qtype])
+	}
+
+	if c.config.DNSSEC {
+		msg.SetEdns0(4096, true)
+	}
+
+	response, err := c.resolver.Exchange(msg, c.ri)
+	if err != nil {
+		logger.Error("DNSCrypt query failed: %v", err)
+		return nil, fmt.Errorf("dnscrypt: query failed: %w", err)
+	}
+
+	if len(response.Answer) > 0 {
+		logger.Debug("DNSCrypt response: %d answers", len(response.Answer))
+	}
+
+	return response, nil
+}
@@ -0,0 +1,222 @@
+package dotcp
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"time"
+
+	"github.com/afonsofrancof/sdns-proxy/common/logger"
+	"github.com/miekg/dns"
+)
+
+type Config struct {
+	HostAndPort  string
+	DNSSEC       bool
+	KeepAlive    bool
+	WriteTimeout time.Duration
+	ReadTimeout  time.Duration
+}
+
+type Client struct {
+	hostAndPort string
+	config      Config
+	conn        net.Conn
+	connMutex   sync.Mutex
+}
+
+func New(config Config) (*Client, error) {
+	logger.Debug("Creating DoTCP client: %s (KeepAlive: %v)", config.HostAndPort, config.KeepAlive)
+
+	if config.HostAndPort == "" {
+		logger.Error("DoTCP client creation failed: empty HostAndPort")
+		return nil, fmt.Errorf("dotcp: HostAndPort cannot be empty")
+	}
+	if config.WriteTimeout <= 0 {
+		config.WriteTimeout = 2 * time.Second
+	}
+	if config.ReadTimeout <= 0 {
+		config.ReadTimeout = 5 * time.Second
+	}
+
+	client := &Client{
+		hostAndPort: config.HostAndPort,
+		config:      config,
+	}
+
+	if config.KeepAlive {
+		if err := client.ensureConnection(); err != nil {
+			logger.Error("DoTCP failed to establish initial connection: %v", err)
+			return nil, fmt.Errorf("failed to establish initial connection: %w", err)
+		}
+	}
+
+	logger.Debug("DoTCP client created: %s (DNSSEC: %v, KeepAlive: %v)", config.HostAndPort, config.DNSSEC, config.KeepAlive)
+	return client, nil
+}
+
+func (c *Client) Close() {
+	logger.Debug("Closing DoTCP client")
+	c.connMutex.Lock()
+	defer c.connMutex.Unlock()
+
+	if c.conn != nil {
+		c.conn.Close()
+		c.conn = nil
+	}
+}
+
+func (c *Client) ensureConnection() error {
+	c.connMutex.Lock()
+	defer c.connMutex.Unlock()
+
+	if c.conn != nil {
+		if err := c.conn.SetReadDeadline(time.Now().Add(time.Millisecond)); err == nil {
+			var testBuf [1]byte
+			_, err := c.conn.Read(testBuf[:])
+			c.conn.SetReadDeadline(time.Time{})
+
+			if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
+				return nil
+			}
+
+			logger.Debug("DoTCP connection test failed, reconnecting: %v", err)
+			c.conn.Close()
+			c.conn = nil
+		}
+	}
+
+	logger.Debug("Establishing DoTCP connection to %s", c.hostAndPort)
+	dialer := &net.Dialer{
+		Timeout: c.config.WriteTimeout,
+	}
+
+	conn, err := dialer.Dial("tcp", c.hostAndPort)
+	if err != nil {
+		logger.Error("DoTCP connection failed to %s: %v", c.hostAndPort, err)
+		return err
+	}
+
+	c.conn = conn
+	logger.Debug("DoTCP connection established to %s", c.hostAndPort)
+	return nil
+}
+
+func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
+	if len(msg.Question) > 0 {
+		question := msg.Question[0]
+		logger.Debug("DoTCP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
+	}
+
+	if c.config.KeepAlive {
+		if err := c.ensureConnection(); err != nil {
+			return nil, fmt.Errorf("dotcp: failed to ensure connection: %w", err)
+		}
+	} else {
+		c.connMutex.Lock()
+		if c.conn != nil {
+			c.conn.Close()
+			c.conn = nil
+		}
+		c.connMutex.Unlock()
+
+		if err := c.ensureConnection(); err != nil {
+			return nil, fmt.Errorf("dotcp: failed to create connection: %w", err)
+		}
+	}
+
+	if c.config.DNSSEC {
+		msg.SetEdns0(4096, true)
+	}
+
+	packed, err := msg.Pack()
+	if err != nil {
+		logger.Error("DoTCP failed to pack message: %v", err)
+		return nil, fmt.Errorf("dotcp: failed to pack message: %w", err)
+	}
+
+	// DNS over TCP uses 2-byte length prefix
+	length := make([]byte, 2)
+	binary.BigEndian.PutUint16(length, uint16(len(packed)))
+	data := append(length, packed...)
+
+	c.connMutex.Lock()
+	conn := c.conn
+	c.connMutex.Unlock()
+
+	if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
+		logger.Error("DoTCP failed to set write deadline: %v", err)
+		return nil, fmt.Errorf("dotcp: failed to set write deadline: %w", err)
+	}
+
+	if _, err := conn.Write(data); err != nil {
+		logger.Error("DoTCP failed to write message to %s: %v", c.hostAndPort, err)
+
+		if c.config.KeepAlive {
+			logger.Debug("DoTCP write failed with keep-alive, attempting reconnect")
+			if reconnectErr := c.ensureConnection(); reconnectErr != nil {
+				return nil, fmt.Errorf("dotcp: failed to reconnect: %w", reconnectErr)
+			}
+
+			c.connMutex.Lock()
+			conn = c.conn
+			c.connMutex.Unlock()
+
+			if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
+				return nil, fmt.Errorf("dotcp: failed to set write deadline after reconnect: %w", err)
+			}
+
+			if _, err := conn.Write(data); err != nil {
+				return nil, fmt.Errorf("dotcp: failed to write message after reconnect: %w", err)
+			}
+		} else {
+			return nil, fmt.Errorf("dotcp: failed to write message: %w", err)
+		}
+	}
+
+	if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil {
+		logger.Error("DoTCP failed to set read deadline: %v", err)
+		return nil, fmt.Errorf("dotcp: failed to set read deadline: %w", err)
+	}
+
+	lengthBuf := make([]byte, 2)
+	if _, err := io.ReadFull(conn, lengthBuf); err != nil {
+		logger.Error("DoTCP failed to read response length from %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("dotcp: failed to read response length: %w", err)
+	}
+
+	msgLen := binary.BigEndian.Uint16(lengthBuf)
+	if msgLen > dns.MaxMsgSize {
+		logger.Error("DoTCP response too large from %s: %d bytes", c.hostAndPort, msgLen)
+		return nil, fmt.Errorf("dotcp: response message too large: %d", msgLen)
+	}
+
+	buffer := make([]byte, msgLen)
+	if _, err := io.ReadFull(conn, buffer); err != nil {
+		logger.Error("DoTCP failed to read response from %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("dotcp: failed to read response: %w", err)
+	}
+
+	response := new(dns.Msg)
+	if err := response.Unpack(buffer); err != nil {
+		logger.Error("DoTCP failed to unpack response from %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("dotcp: failed to unpack response: %w", err)
+	}
+
+	if len(response.Answer) > 0 {
+		logger.Debug("DoTCP response from %s: %d answers", c.hostAndPort, len(response.Answer))
+	}
+
+	if !c.config.KeepAlive {
+		c.connMutex.Lock()
+		if c.conn != nil {
+			c.conn.Close()
+			c.conn = nil
+		}
+		c.connMutex.Unlock()
+	}
+
+	return response, nil
+}
@@ -1,4 +1,4 @@
-package do53
+package doudp

 import (
 	"fmt"
@@ -22,11 +22,11 @@ type Client struct {
 }

 func New(config Config) (*Client, error) {
-	logger.Debug("Creating DO53 client: %s", config.HostAndPort)
-	
+	logger.Debug("Creating DoUDP client: %s", config.HostAndPort)
+
 	if config.HostAndPort == "" {
-		logger.Error("DO53 client creation failed: empty HostAndPort")
-		return nil, fmt.Errorf("do53: HostAndPort cannot be empty")
+		logger.Error("DoUDP client creation failed: empty HostAndPort")
+		return nil, fmt.Errorf("doudp: HostAndPort cannot be empty")
 	}
 	if config.WriteTimeout <= 0 {
 		config.WriteTimeout = 2 * time.Second
@@ -35,7 +35,7 @@ func New(config Config) (*Client, error) {
 		config.ReadTimeout = 5 * time.Second
 	}

-	logger.Debug("DO53 client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC)
+	logger.Debug("DoUDP client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC)

 	return &Client{
 		hostAndPort: config.HostAndPort,
@@ -44,36 +44,35 @@ func New(config Config) (*Client, error) {
 }

 func (c *Client) Close() {
-	logger.Debug("Closing DO53 client")
+	logger.Debug("Closing DoUDP client")
 }

 func (c *Client) createConnection() (*net.UDPConn, error) {
 	udpAddr, err := net.ResolveUDPAddr("udp", c.hostAndPort)
 	if err != nil {
-		logger.Error("DO53 failed to resolve address %s: %v", c.hostAndPort, err)
+		logger.Error("DoUDP failed to resolve address %s: %v", c.hostAndPort, err)
 		return nil, fmt.Errorf("failed to resolve UDP address: %w", err)
 	}

 	conn, err := net.DialUDP("udp", nil, udpAddr)
 	if err != nil {
-		logger.Error("DO53 failed to connect to %s: %v", c.hostAndPort, err)
+		logger.Error("DoUDP failed to connect to %s: %v", c.hostAndPort, err)
 		return nil, err
 	}

-	logger.Debug("DO53 connection established to %s", c.hostAndPort)
+	logger.Debug("DoUDP connection established to %s", c.hostAndPort)
 	return conn, nil
 }

 func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
 	if len(msg.Question) > 0 {
 		question := msg.Question[0]
-		logger.Debug("DO53 query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
+		logger.Debug("DoUDP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
 	}

-	// Create connection for this query
 	conn, err := c.createConnection()
 	if err != nil {
-		return nil, fmt.Errorf("do53: failed to create connection: %w", err)
+		return nil, fmt.Errorf("doudp: failed to create connection: %w", err)
 	}
 	defer conn.Close()

@@ -83,43 +82,40 @@ func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {

 	packedMsg, err := msg.Pack()
 	if err != nil {
-		logger.Error("DO53 failed to pack message: %v", err)
-		return nil, fmt.Errorf("do53: failed to pack DNS message: %w", err)
+		logger.Error("DoUDP failed to pack message: %v", err)
+		return nil, fmt.Errorf("doudp: failed to pack DNS message: %w", err)
 	}

-	// Send query
 	if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
-		logger.Error("DO53 failed to set write deadline: %v", err)
-		return nil, fmt.Errorf("do53: failed to set write deadline: %w", err)
+		logger.Error("DoUDP failed to set write deadline: %v", err)
+		return nil, fmt.Errorf("doudp: failed to set write deadline: %w", err)
 	}

 	if _, err := conn.Write(packedMsg); err != nil {
-		logger.Error("DO53 failed to send query to %s: %v", c.hostAndPort, err)
-		return nil, fmt.Errorf("do53: failed to send DNS query: %w", err)
+		logger.Error("DoUDP failed to send query to %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("doudp: failed to send DNS query: %w", err)
 	}

-	// Read response
 	if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil {
-		logger.Error("DO53 failed to set read deadline: %v", err)
-		return nil, fmt.Errorf("do53: failed to set read deadline: %w", err)
+		logger.Error("DoUDP failed to set read deadline: %v", err)
+		return nil, fmt.Errorf("doudp: failed to set read deadline: %w", err)
 	}

 	buffer := make([]byte, dns.MaxMsgSize)
 	n, err := conn.Read(buffer)
 	if err != nil {
-		logger.Error("DO53 failed to read response from %s: %v", c.hostAndPort, err)
-		return nil, fmt.Errorf("do53: failed to read DNS response: %w", err)
+		logger.Error("DoUDP failed to read response from %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("doudp: failed to read DNS response: %w", err)
 	}

-	// Parse response
 	response := new(dns.Msg)
 	if err := response.Unpack(buffer[:n]); err != nil {
-		logger.Error("DO53 failed to unpack response from %s: %v", c.hostAndPort, err)
-		return nil, fmt.Errorf("do53: failed to unpack DNS response: %w", err)
+		logger.Error("DoUDP failed to unpack response from %s: %v", c.hostAndPort, err)
+		return nil, fmt.Errorf("doudp: failed to unpack DNS response: %w", err)
 	}

 	if len(response.Answer) > 0 {
-		logger.Debug("DO53 response from %s: %d answers", c.hostAndPort, len(response.Answer))
+		logger.Debug("DoUDP response from %s: %d answers", c.hostAndPort, len(response.Answer))
 	}

 	return response, nil
@@ -1,26 +1,29 @@
 module github.com/afonsofrancof/sdns-proxy

-go 1.24.0
+go 1.24.1

 require (
 	github.com/alecthomas/kong v1.8.1
+	github.com/ameshkov/dnscrypt/v2 v2.4.0
 	github.com/google/gopacket v1.1.19
-	github.com/miekg/dns v1.1.63
+	github.com/miekg/dns v1.1.65
 	github.com/quic-go/quic-go v0.50.0
-	golang.org/x/net v0.35.0
+	golang.org/x/net v0.38.0
 )

 require (
+	github.com/AdguardTeam/golibs v0.32.7 // indirect
+	github.com/ameshkov/dnsstamps v1.0.3 // indirect
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/onsi/ginkgo/v2 v2.9.5 // indirect
 	github.com/quic-go/qpack v0.5.1 // indirect
 	go.uber.org/mock v0.5.0 // indirect
-	golang.org/x/crypto v0.33.0 // indirect
-	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
-	golang.org/x/mod v0.18.0 // indirect
-	golang.org/x/sync v0.11.0 // indirect
-	golang.org/x/sys v0.30.0 // indirect
-	golang.org/x/text v0.22.0 // indirect
-	golang.org/x/tools v0.22.0 // indirect
+	golang.org/x/crypto v0.37.0 // indirect
+	golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
+	golang.org/x/mod v0.24.0 // indirect
+	golang.org/x/sync v0.13.0 // indirect
+	golang.org/x/sys v0.32.0 // indirect
+	golang.org/x/text v0.24.0 // indirect
+	golang.org/x/tools v0.31.0 // indirect
 )
@@ -1,23 +1,29 @@
+github.com/AdguardTeam/golibs v0.32.7 h1:3dmGlAVgmvquCCwHsvEl58KKcRAK3z1UnjMnwSIeDH4=
+github.com/AdguardTeam/golibs v0.32.7/go.mod h1:bE8KV1zqTzgZjmjFyBJ9f9O5DEKO717r7e57j1HclJA=
 github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
 github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
 github.com/alecthomas/kong v1.8.1 h1:6aamvWBE/REnR/BCq10EcozmcpUPc5aGI1lPAWdB0EE=
 github.com/alecthomas/kong v1.8.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
 github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
 github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
+github.com/ameshkov/dnscrypt/v2 v2.4.0 h1:if6ZG2cuQmcP2TwSY+D0+8+xbPfoatufGlOQTMNkI9o=
+github.com/ameshkov/dnscrypt/v2 v2.4.0/go.mod h1:WpEFV2uhebXb8Jhes/5/fSdpmhGV8TL22RDaeWwV6hI=
+github.com/ameshkov/dnsstamps v1.0.3 h1:Srzik+J9mivH1alRACTbys2xOxs0lRH9qnTA7Y1OYVo=
+github.com/ameshkov/dnsstamps v1.0.3/go.mod h1:Ii3eUu73dx4Vw5O4wjzmT5+lkCwovjzaEZZ4gKyIH5A=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
-github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
-github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
@@ -25,8 +31,8 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/miekg/dns v1.1.63 h1:8M5aAw6OMZfFXTT7K5V0Eu5YiiL8l7nUAkyN6C9YwaY=
-github.com/miekg/dns v1.1.63/go.mod h1:6NGHfjhpmr5lt3XPLuyfDJi5AXbNIPM9PY6H6sF1Nfs=
+github.com/miekg/dns v1.1.65 h1:0+tIPHzUW0GCge7IiK3guGP57VAw7hoPDfApjkMD1Fc=
+github.com/miekg/dns v1.1.65/go.mod h1:Dzw9769uoKVaLuODMDZz9M6ynFU6Em65csPuoi8G0ck=
 github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
 github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
@@ -39,43 +45,43 @@ github.com/quic-go/quic-go v0.50.0 h1:3H/ld1pa3CYhkcc20TPIyG1bNsdhn9qZBGN3b9/UyU
 github.com/quic-go/quic-go v0.50.0/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
 go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
-golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
-golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
-golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
+golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
+golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
+golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw=
+golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM=
 golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
-golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
+golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
-golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
-golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
+golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
-golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
+golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
-golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
-golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
-golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
+golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
+golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
+golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
-golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
+golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
+golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
+google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
@@ -1,3 +1,4 @@
+// ./internal/qol/utils.go
 package qol

 import (
@@ -15,7 +16,7 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli

 	base := proto
 	var flags []string
-	
+
 	if dnssec {
 		if authDNSSEC {
 			flags = append(flags, "auth")
@@ -57,6 +58,7 @@ func cleanServerName(server string) string {
 		"94.140.15.15":          "adguard",
 		"dns.adguard.com":       "adguard",
 		"dns.adguard-dns.com":   "adguard",
+		"AQMAAAAAAAAAETk0LjE0MC4xNS4xNTo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20": "adguard",
 	}

 	serverName := ""
@@ -76,11 +78,31 @@ func cleanServerName(server string) string {
 }

 func DetectProtocol(upstream string) string {
+
 	if strings.Contains(upstream, "://") {
 		u, err := url.Parse(upstream)
 		if err == nil && u.Scheme != "" {
-			return strings.ToLower(u.Scheme)
+			scheme := strings.ToLower(u.Scheme)
+			// Normalize scheme names
+			switch scheme {
+			case "udp", "doudp":
+				return "doudp"
+			case "tcp", "dotcp":
+				return "dotcp"
+			case "tls", "dot":
+				return "dot"
+			case "https", "doh":
+				return "doh"
+			case "doh3":
+				return "doh3"
+			case "doq":
+				return "doq"
+			case "sdns":
+				return "dnscrypt"
+			default:
+				return scheme
+			}
 		}
 	}
-	return "do53"
+	return "doudp"
 }
@@ -1,289 +1,498 @@
-import csv
-import os
-import statistics
-from collections import defaultdict
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
 from pathlib import Path
+from scipy import stats
+import warnings

-def map_server_to_resolver(server):
-    """Map server address/domain to resolver name"""
-    server_lower = server.lower()
-    
-    if '1.1.1.1' in server_lower or 'cloudflare' in server_lower:
-        return 'Cloudflare'
-    elif '8.8.8.8' in server_lower or 'google' in server_lower:
-        return 'Google'
-    elif '9.9.9.9' in server_lower or 'quad9' in server_lower:
-        return 'Quad9'
-    elif 'adguard' in server_lower:
-        return 'AdGuard'
-    else:
-        return server  # Fallback to original server name
+warnings.filterwarnings('ignore')

-def extract_from_new_format(filename):
-    """Parse new filename format: protocol[-flags]-timestamp.csv"""
-    base = filename.replace('.csv', '')
-    parts = base.split('-')
-    
-    if len(parts) < 2:
-        return None, None, None, None
-    
-    protocol = parts[0]
-    timestamp = parts[-1]
-    
-    # Flags are everything between protocol and timestamp
-    flags_str = '-'.join(parts[1:-1])
-    
-    # Determine DNSSEC status
-    if 'auth' in flags_str:
-        dnssec_status = 'auth'  # Authoritative DNSSEC
-    elif 'trust' in flags_str:
-        dnssec_status = 'trust'  # Trust-based DNSSEC
-    else:
-        dnssec_status = 'off'
-    
-    keepalive_status = 'on' if 'persist' in flags_str else 'off'
-    
-    return protocol, dnssec_status, keepalive_status, flags_str
+# Set style for publication-quality plots
+sns.set_style("whitegrid")
+plt.rcParams['figure.dpi'] = 300
+plt.rcParams['savefig.dpi'] = 300
+plt.rcParams['font.size'] = 10
+plt.rcParams['figure.figsize'] = (12, 6)

-def extract_server_info_from_csv(row):
-    """Extract DNSSEC info from CSV row data"""
-    dnssec = row.get('dnssec', 'false').lower() == 'true'
-    auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true'
-    keepalive = row.get('keep_alive', 'false').lower() == 'true'
-    
-    if dnssec:
-        if auth_dnssec:
-            dnssec_status = 'auth'
-        else:
-            dnssec_status = 'trust'
-    else:
-        dnssec_status = 'off'
-    
-    keepalive_status = 'on' if keepalive else 'off'
-    
-    return dnssec_status, keepalive_status
-
-def extract_server_info(file_path, row):
-    """Extract info using directory structure, filename, and CSV data"""
-    path = Path(file_path)
-    
-    # First try to get DNSSEC info from CSV row (most accurate)
-    try:
-        csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row)
-        protocol = row.get('protocol', '').lower()
+class DNSAnalyzer:
+    def __init__(self, results_dir='results'):
+        self.results_dir = Path(results_dir)
+        self.df = None
        
-        # Get server from directory structure
-        parts = path.parts
-        if len(parts) >= 4:
-            potential_date = parts[-2]
-            # Check if it's a date like YYYY-MM-DD
-            if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
-                server = parts[-3]  # resolver folder (e.g., cloudflare)
-                return protocol, server, csv_dnssec_status, csv_keepalive_status
+    def load_all_data(self):
+        """Load all CSV files from the results directory"""
+        data_frames = []
        
-        # Fallback to DNS server field
-        server = row.get('dns_server', '')
-        return protocol, server, csv_dnssec_status, csv_keepalive_status
+        providers = ['adguard', 'cloudflare', 'google', 'quad9']
        
-    except (KeyError, ValueError):
-        pass
-    
-    # Fallback to filename parsing
-    filename = path.name
-    protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename)
-    
-    if protocol:
-        # Get server from directory structure
-        parts = path.parts
-        if len(parts) >= 4:
-            potential_date = parts[-2]
-            if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
-                server = parts[-3]
-                return protocol, server, dnssec_status, keepalive_status
-        
-        # Fallback to DNS server field
-        server = row.get('dns_server', '')
-        return protocol, server, dnssec_status, keepalive_status
-    
-    return None, None, None, None
-
-def get_dnssec_display_name(dnssec_status):
-    """Convert DNSSEC status to display name"""
-    if dnssec_status == 'auth':
-        return 'DNSSEC (Authoritative)'
-    elif dnssec_status == 'trust':
-        return 'DNSSEC (Trust-based)'
-    else:
-        return 'No DNSSEC'
-
-def analyze_dns_data(root_directory, output_file):
-    """Analyze DNS data and generate metrics"""
-    
-    # Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]}
-    measurements = defaultdict(list)
-    
-    # Walk through all directories
-    for root, dirs, files in os.walk(root_directory):
-        for file in files:
-            if file.endswith('.csv'):
-                file_path = os.path.join(root, file)
-                print(f"Processing: {file_path}")
+        for provider in providers:
+            provider_path = self.results_dir / provider
+            if not provider_path.exists():
+                continue
                
+            for csv_file in provider_path.glob('*.csv'):
                try:
-                    with open(file_path, 'r', newline='') as csvfile:
-                        reader = csv.DictReader(csvfile)
-                        
-                        for row_num, row in enumerate(reader, 2):  # Start at 2 since header is row 1
-                            try:
-                                protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row)
-                                
-                                if protocol and server:
-                                    resolver = map_server_to_resolver(server)
-                                    duration_ms = float(row.get('duration_ms', 0))
-                                    
-                                    # Only include successful queries
-                                    if row.get('response_code', '') in ['NOERROR', '']:
-                                        key = (resolver, protocol, dnssec_status, keepalive_status)
-                                        measurements[key].append(duration_ms)
-                                    
-                            except (ValueError, TypeError) as e:
-                                print(f"Data parse error in {file_path} row {row_num}: {e}")
-                                continue
-                                
+                    df = pd.read_csv(csv_file)
+                    df['provider'] = provider
+                    df['test_config'] = csv_file.stem
+                    data_frames.append(df)
                except Exception as e:
-                    print(f"Error processing file {file_path}: {e}")
-                    continue
-    
-    # Calculate statistics grouped by resolver first, then by configuration
-    resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-    
-    for (resolver, protocol, dnssec, keepalive), durations in measurements.items():
-        if durations:
-            stats = {
-                'protocol': protocol.upper(),
-                'dnssec': dnssec,
-                'keepalive': keepalive,
-                'total_queries': len(durations),
-                'avg_latency_ms': round(statistics.mean(durations), 3),
-                'median_latency_ms': round(statistics.median(durations), 3),
-                'min_latency_ms': round(min(durations), 3),
-                'max_latency_ms': round(max(durations), 3),
-                'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3),
-                'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3),
-                'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3)
-            }
-            # Group by resolver -> dnssec -> keepalive -> protocol
-            resolver_results[resolver][dnssec][keepalive].append(stats)
-    
-    # Sort each configuration's results by average latency
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms'])
-    
-    # Write to CSV with all data
-    all_results = []
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                for result in resolver_results[resolver][dnssec][keepalive]:
-                    result['resolver'] = resolver
-                    all_results.append(result)
-    
-    with open(output_file, 'w', newline='') as csvfile:
-        fieldnames = [
-            'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries',
-            'avg_latency_ms', 'median_latency_ms', 'min_latency_ms', 
-            'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms'
-        ]
+                    print(f"Error loading {csv_file}: {e}")
        
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(all_results)
-    
-    print(f"\nAnalysis complete! Full results written to {output_file}")
-    print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}")
-    
-    def print_configuration_table(resolver, dnssec_status, keepalive_status, results):
-        """Print a formatted table for a specific configuration"""
-        ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN"
-        dnssec_display = get_dnssec_display_name(dnssec_status)
+        self.df = pd.concat(data_frames, ignore_index=True)
+        self._clean_and_enrich_data()
+        print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
        
-        print(f"\n  {dnssec_display} - {ka_indicator}")
-        print("  " + "-" * 90)
-        print(f"  {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}")
-        print("  " + "-" * 90)
+    def _clean_and_enrich_data(self):
+        """Clean data and add useful columns"""
+        # Remove failed queries
+        self.df = self.df[self.df['error'].isna()]
        
-        for result in results:
-            print(f"  {result['protocol']:<12} {result['total_queries']:<8} "
-                  f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} "
-                  f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} "
-                  f"{result['p95_latency_ms']:<10}")
-    
-    # Print results grouped by resolver first
-    print(f"\n{'=' * 100}")
-    print("DNS RESOLVER PERFORMANCE COMPARISON")
-    print(f"{'=' * 100}")
-    
-    for resolver in sorted(resolver_results.keys()):
-        print(f"\n{resolver} DNS Resolver")
-        print("=" * 100)
+        # Extract protocol base (remove -auth, -trust suffixes)
+        self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
        
-        # Order configurations logically
-        config_order = [
-            ('off', 'off'),     # No DNSSEC, New connections
-            ('off', 'on'),      # No DNSSEC, Persistent
-            ('trust', 'off'),   # Trust DNSSEC, New connections  
-            ('trust', 'on'),    # Trust DNSSEC, Persistent
-            ('auth', 'off'),    # Auth DNSSEC, New connections
-            ('auth', 'on'),     # Auth DNSSEC, Persistent
-        ]
+        # DNSSEC configuration
+        self.df['dnssec_mode'] = 'none'
+        self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
+        self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
        
-        for dnssec_status, keepalive_status in config_order:
-            if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]:
-                results = resolver_results[resolver][dnssec_status][keepalive_status]
-                if results:  # Only print if there are results
-                    print_configuration_table(resolver, dnssec_status, keepalive_status, results)
-    
-    # Summary comparison across resolvers
-    print(f"\n{'=' * 100}")
-    print("CROSS-RESOLVER PROTOCOL COMPARISON")
-    print(f"{'=' * 100}")
-    
-    # Group by protocol and configuration for cross-resolver comparison
-    protocol_comparison = defaultdict(lambda: defaultdict(list))
-    
-    for resolver in resolver_results:
-        for dnssec in resolver_results[resolver]:
-            for keepalive in resolver_results[resolver][dnssec]:
-                for result in resolver_results[resolver][dnssec][keepalive]:
-                    config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}"
-                    protocol_comparison[result['protocol']][config_key].append({
-                        'resolver': resolver,
-                        'avg_latency_ms': result['avg_latency_ms'],
-                        'total_queries': result['total_queries']
-                    })
-    
-    for protocol in sorted(protocol_comparison.keys()):
-        print(f"\n{protocol} Protocol Comparison")
-        print("-" * 100)
+        # Protocol categories
+        self.df['protocol_category'] = self.df['protocol_base'].map({
+            'udp': 'Plain DNS',
+            'tls': 'DoT',
+            'https': 'DoH',
+            'doh3': 'DoH/3',
+            'doq': 'DoQ'
+        })
        
-        for config in sorted(protocol_comparison[protocol].keys()):
-            resolvers_data = protocol_comparison[protocol][config]
-            if resolvers_data:
-                print(f"\n  {config}")
-                print("  " + "-" * 60)
-                print(f"  {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}")
-                print("  " + "-" * 60)
-                
-                # Sort by average latency
-                resolvers_data.sort(key=lambda x: x['avg_latency_ms'])
-                
-                for data in resolvers_data:
-                    print(f"  {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}")
+        # Connection persistence
+        self.df['persistence'] = self.df['keep_alive'].fillna(False)
+        
+    def generate_summary_statistics(self):
+        """Generate comprehensive summary statistics"""
+        print("\n" + "="*80)
+        print("SUMMARY STATISTICS")
+        print("="*80)
+        
+        # Overall statistics
+        print("\n--- Overall Performance ---")
+        print(f"Total queries: {len(self.df)}")
+        print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
+        print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
+        print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
+        print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
+        
+        # By protocol
+        print("\n--- Performance by Protocol ---")
+        protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('std', 'std'),
+            ('p95', lambda x: x.quantile(0.95)),
+            ('p99', lambda x: x.quantile(0.99))
+        ]).round(2)
+        print(protocol_stats)
+        
+        # By provider
+        print("\n--- Performance by Provider ---")
+        provider_stats = self.df.groupby('provider')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('std', 'std'),
+            ('p95', lambda x: x.quantile(0.95))
+        ]).round(2)
+        print(provider_stats)
+        
+        # DNSSEC impact
+        print("\n--- DNSSEC Validation Impact ---")
+        dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
+            ('count', 'count'),
+            ('mean', 'mean'),
+            ('median', 'median'),
+            ('overhead_vs_none', lambda x: x.mean())
+        ]).round(2)
+        
+        # Calculate overhead percentage
+        baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
+        if baseline > 0:
+            dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
+        print(dnssec_stats)
+        
+        # Bandwidth analysis
+        print("\n--- Bandwidth Usage ---")
+        bandwidth_stats = self.df.groupby('protocol_category').agg({
+            'request_size_bytes': ['mean', 'median'],
+            'response_size_bytes': ['mean', 'median']
+        }).round(2)
+        print(bandwidth_stats)
+        
+        # Persistence impact (where applicable)
+        print("\n--- Connection Persistence Impact ---")
+        persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
+        if len(persist_protocols) > 0:
+            persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
+                ('mean', 'mean'),
+                ('median', 'median')
+            ]).round(2)
+            print(persist_stats)
+        
+        return {
+            'protocol': protocol_stats,
+            'provider': provider_stats,
+            'dnssec': dnssec_stats,
+            'bandwidth': bandwidth_stats
+        }
+    
+    def plot_latency_by_protocol(self, output_dir='plots'):
+        """Violin plot of latency distribution by protocol"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        plt.figure(figsize=(14, 7))
+        
+        # Order protocols logically
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        sns.violinplot(data=self.df, x='protocol_category', y='duration_ms', 
+                      order=available_protocols, inner='box', cut=0)
+        
+        plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Response Time (ms)', fontsize=12)
+        plt.xticks(rotation=0)
+        
+        # Add mean values as annotations
+        for i, protocol in enumerate(available_protocols):
+            mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
+            plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: latency_by_protocol.png")
+    
+    def plot_provider_comparison(self, output_dir='plots'):
+        """Box plot comparing providers across protocols"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
+        fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
+        
+        protocols = self.df['protocol_category'].unique()
+        protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
+        
+        for idx, protocol in enumerate(protocols[:4]):
+            ax = axes[idx // 2, idx % 2]
+            data = self.df[self.df['protocol_category'] == protocol]
+            
+            if len(data) > 0:
+                sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
+                ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
+                ax.set_xlabel('Provider', fontsize=10)
+                ax.set_ylabel('Response Time (ms)', fontsize=10)
+                ax.tick_params(axis='x', rotation=45)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: provider_comparison.png")
+    
+    def plot_dnssec_impact(self, output_dir='plots'):
+        """Compare DNSSEC validation methods (trust vs auth)"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Filter for protocols that have DNSSEC variations
+        dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
+        
+        if len(dnssec_data) == 0:
+            print("⚠ No DNSSEC data available")
+            return
+        
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+        
+        # Plot 1: Overall DNSSEC impact
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        sns.barplot(data=self.df, x='protocol_category', y='duration_ms', 
+                   hue='dnssec_mode', order=available, ax=ax1, ci=95)
+        ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
+        ax1.set_xlabel('Protocol', fontsize=10)
+        ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
+        ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
+        ax1.tick_params(axis='x', rotation=0)
+        
+        # Plot 2: Trust vs Auth comparison
+        comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
+        pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
+        
+        if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
+            pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
+            pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
+            ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
+            ax2.set_xlabel('Protocol', fontsize=10)
+            ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
+            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
+            ax2.tick_params(axis='x', rotation=45)
+            ax2.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: dnssec_impact.png")
+    
+    def plot_persistence_impact(self, output_dir='plots'):
+        """Analyze impact of connection persistence"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
+        
+        if len(persist_data) == 0:
+            print("⚠ No persistence data available")
+            return
+        
+        plt.figure(figsize=(12, 6))
+        
+        sns.barplot(data=persist_data, x='protocol_base', y='duration_ms', 
+                   hue='persistence', ci=95)
+        
+        plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Mean Response Time (ms)', fontsize=12)
+        plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
+        
+        # Calculate and annotate overhead reduction
+        for protocol in persist_data['protocol_base'].unique():
+            protocol_data = persist_data[persist_data['protocol_base'] == protocol]
+            
+            no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
+            with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
+            
+            if not np.isnan(no_persist) and not np.isnan(with_persist):
+                reduction = ((no_persist - with_persist) / no_persist * 100)
+                print(f"{protocol}: {reduction:.1f}% reduction with persistence")
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: persistence_impact.png")
+    
+    def plot_bandwidth_overhead(self, output_dir='plots'):
+        """Visualize bandwidth usage by protocol"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        bandwidth_data = self.df.groupby('protocol_category').agg({
+            'request_size_bytes': 'mean',
+            'response_size_bytes': 'mean'
+        }).reset_index()
+        
+        bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] + 
+                                         bandwidth_data['response_size_bytes'])
+        
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+        
+        # Plot 1: Request vs Response sizes
+        x = np.arange(len(bandwidth_data))
+        width = 0.35
+        
+        ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width, 
+               label='Request', alpha=0.8)
+        ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width, 
+               label='Response', alpha=0.8)
+        
+        ax1.set_xlabel('Protocol', fontsize=12)
+        ax1.set_ylabel('Bytes', fontsize=12)
+        ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
+        ax1.set_xticks(x)
+        ax1.set_xticklabels(bandwidth_data['protocol_category'])
+        ax1.legend()
+        ax1.grid(axis='y', alpha=0.3)
+        
+        # Plot 2: Total bandwidth overhead vs UDP baseline
+        udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
+        if len(udp_total) > 0:
+            bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
+            
+            colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
+            ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'], 
+                   color=colors, alpha=0.7)
+            ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
+            ax2.set_xlabel('Protocol', fontsize=12)
+            ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
+            ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
+            ax2.grid(axis='y', alpha=0.3)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: bandwidth_overhead.png")
+    
+    def plot_heatmap(self, output_dir='plots'):
+        """Heatmap of provider-protocol performance"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Create pivot table
+        heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
+        
+        plt.figure(figsize=(12, 8))
+        sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r', 
+                   cbar_kws={'label': 'Median Latency (ms)'})
+        
+        plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Provider', fontsize=12)
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: provider_protocol_heatmap.png")
+    
+    def plot_percentile_comparison(self, output_dir='plots'):
+        """Plot percentile comparison across protocols"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        percentiles = [50, 75, 90, 95, 99]
+        protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
+        available = [p for p in protocol_order if p in self.df['protocol_category'].values]
+        
+        percentile_data = []
+        for protocol in available:
+            data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
+            for p in percentiles:
+                percentile_data.append({
+                    'protocol': protocol,
+                    'percentile': f'P{p}',
+                    'latency': np.percentile(data, p)
+                })
+        
+        percentile_df = pd.DataFrame(percentile_data)
+        
+        plt.figure(figsize=(14, 7))
+        sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
+        
+        plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
+        plt.xlabel('Protocol', fontsize=12)
+        plt.ylabel('Response Time (ms)', fontsize=12)
+        plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
+        
+        plt.tight_layout()
+        plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
+        plt.close()
+        print(f"✓ Saved: percentile_comparison.png")
+    
+    def statistical_tests(self):
+        """Perform statistical significance tests"""
+        print("\n" + "="*80)
+        print("STATISTICAL TESTS")
+        print("="*80)
+        
+        # Test 1: Protocol differences (Kruskal-Wallis)
+        protocols = self.df['protocol_category'].unique()
+        if len(protocols) > 2:
+            groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values 
+                     for p in protocols]
+            h_stat, p_value = stats.kruskal(*groups)
+            print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
+            print(f"H-statistic: {h_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
+        
+        # Test 2: DNSSEC impact (Mann-Whitney U)
+        if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
+            none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
+            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
+            
+            u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
+            print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
+            print(f"U-statistic: {u_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
+        
+        # Test 3: Trust vs Auth comparison
+        if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
+            trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
+            auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
+            
+            u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
+            print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
+            print(f"U-statistic: {u_stat:.4f}")
+            print(f"p-value: {p_value:.4e}")
+            print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
+    
+    def generate_latex_table(self, output_dir='plots'):
+        """Generate LaTeX table for thesis"""
+        Path(output_dir).mkdir(exist_ok=True)
+        
+        # Summary table by protocol
+        summary = self.df.groupby('protocol_category')['duration_ms'].agg([
+            ('Mean', 'mean'),
+            ('Median', 'median'),
+            ('Std Dev', 'std'),
+            ('P95', lambda x: x.quantile(0.95)),
+            ('P99', lambda x: x.quantile(0.99))
+        ]).round(2)
+        
+        latex_code = summary.to_latex(float_format="%.2f")
+        
+        with open(f'{output_dir}/summary_table.tex', 'w') as f:
+            f.write(latex_code)
+        
+        print(f"✓ Saved: summary_table.tex")
+        print("\nLaTeX Table Preview:")
+        print(latex_code)
+    
+    def run_full_analysis(self):
+        """Run complete analysis pipeline"""
+        print("="*80)
+        print("DNS QoS Analysis - Starting Full Analysis")
+        print("="*80)
+        
+        # Load data
+        print("\n[1/10] Loading data...")
+        self.load_all_data()
+        
+        # Generate statistics
+        print("\n[2/10] Generating summary statistics...")
+        self.generate_summary_statistics()
+        
+        # Statistical tests
+        print("\n[3/10] Running statistical tests...")
+        self.statistical_tests()
+        
+        # Generate plots
+        print("\n[4/10] Creating latency by protocol plot...")
+        self.plot_latency_by_protocol()
+        
+        print("\n[5/10] Creating provider comparison plot...")
+        self.plot_provider_comparison()
+        
+        print("\n[6/10] Creating DNSSEC impact plot...")
+        self.plot_dnssec_impact()
+        
+        print("\n[7/10] Creating persistence impact plot...")
+        self.plot_persistence_impact()
+        
+        print("\n[8/10] Creating bandwidth overhead plot...")
+        self.plot_bandwidth_overhead()
+        
+        print("\n[9/10] Creating heatmap...")
+        self.plot_heatmap()
+        
+        print("\n[10/10] Creating percentile comparison...")
+        self.plot_percentile_comparison()
+        
+        # Generate LaTeX table
+        print("\n[Bonus] Generating LaTeX table...")
+        self.generate_latex_table()
+        
+        print("\n" + "="*80)
+        print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
+        print("="*80)
+

 if __name__ == "__main__":
-    root_dir = "."
-    output_file = "dns_metrics.csv"
-    
-    analyze_dns_data(root_dir, output_file)
+    analyzer = DNSAnalyzer(results_dir='results')
+    analyzer.run_full_analysis()
@@ -0,0 +1,536 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from pathlib import Path
+import datetime
+from dateutil import parser as date_parser
+import dpkt
+
+# Set style
+sns.set_style("whitegrid")
+plt.rcParams['figure.dpi'] = 300
+plt.rcParams['savefig.dpi'] = 300
+plt.rcParams['font.size'] = 10
+
+class FastDNSAnalyzer:
+    def __init__(self, results_dir='results'):
+        self.results_dir = Path(results_dir)
+        self.all_data = []
+        
+    def should_include_file(self, filename):
+        """Filter out DNSSEC and non-persist files"""
+        name = filename.stem
+        if 'auth' in name or 'trust' in name:
+            return False
+        if name in ['tls', 'https']:
+            return False
+        return True
+    
+    def parse_rfc3339_nano(self, timestamp_str):
+        """Parse RFC3339Nano timestamp with timezone"""
+        try:
+            dt = date_parser.parse(timestamp_str)
+            return dt.astimezone(datetime.timezone.utc).timestamp()
+        except Exception as e:
+            print(f"    Error parsing timestamp {timestamp_str}: {e}")
+            return None
+    
+    def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
+        """Fast bandwidth extraction using dpkt"""
+        print(f"    Analyzing pcap: {pcap_file.name}")
+        
+        try:
+            with open(pcap_file, 'rb') as f:
+                pcap = dpkt.pcap.Reader(f)
+                
+                # Build query time windows
+                query_windows = []
+                for idx, row in csv_data.iterrows():
+                    start_time = self.parse_rfc3339_nano(row['timestamp'])
+                    if start_time is None:
+                        continue
+                    
+                    duration_seconds = row['duration_ns'] / 1_000_000_000
+                    end_time = start_time + duration_seconds
+                    
+                    query_windows.append({
+                        'index': idx,
+                        'start': start_time,
+                        'end': end_time,
+                        'bytes_sent': 0,
+                        'bytes_received': 0,
+                        'packets_sent': 0,
+                        'packets_received': 0
+                    })
+                
+                if not query_windows:
+                    print("    ✗ No valid query windows")
+                    return None
+                
+                # Sort windows for faster matching
+                query_windows.sort(key=lambda x: x['start'])
+                
+                # Process packets
+                packet_count = 0
+                matched_count = 0
+                
+                for timestamp, buf in pcap:
+                    packet_count += 1
+                    packet_size = len(buf)
+                    
+                    # Quick parse to determine direction
+                    try:
+                        eth = dpkt.ethernet.Ethernet(buf)
+                        
+                        # Get IP layer
+                        if isinstance(eth.data, dpkt.ip.IP):
+                            ip = eth.data
+                        elif isinstance(eth.data, dpkt.ip6.IP6):
+                            ip = eth.data
+                        else:
+                            continue
+                        
+                        # Get transport layer
+                        if isinstance(ip.data, dpkt.udp.UDP):
+                            transport = ip.data
+                            src_port = transport.sport
+                            dst_port = transport.dport
+                        elif isinstance(ip.data, dpkt.tcp.TCP):
+                            transport = ip.data
+                            src_port = transport.sport
+                            dst_port = transport.dport
+                        else:
+                            continue
+                        
+                        # Determine direction (client port usually higher)
+                        is_outbound = src_port > dst_port
+                        
+                        # Binary search for matching window
+                        for window in query_windows:
+                            if window['start'] <= timestamp <= window['end']:
+                                if is_outbound:
+                                    window['bytes_sent'] += packet_size
+                                    window['packets_sent'] += 1
+                                else:
+                                    window['bytes_received'] += packet_size
+                                    window['packets_received'] += 1
+                                matched_count += 1
+                                break
+                            elif timestamp < window['start']:
+                                break  # No more windows to check
+                    
+                    except Exception:
+                        continue
+                
+                print(f"    ✓ Processed {packet_count} packets, matched {matched_count}")
+                
+                # Convert to DataFrame
+                bandwidth_df = pd.DataFrame(query_windows)
+                return bandwidth_df[['index', 'bytes_sent', 'bytes_received', 
+                                   'packets_sent', 'packets_received']]
+        
+        except Exception as e:
+            print(f"    ✗ Error reading pcap: {e}")
+            return None
+    
+    def load_data(self):
+        """Load all relevant CSV files and extract bandwidth from pcaps"""
+        print("Loading data and analyzing bandwidth...")
+        
+        for provider_dir in self.results_dir.iterdir():
+            if not provider_dir.is_dir():
+                continue
+            
+            provider = provider_dir.name
+            
+            for csv_file in provider_dir.glob('*.csv'):
+                if not self.should_include_file(csv_file):
+                    continue
+                
+                try:
+                    df = pd.read_csv(csv_file)
+                    df['provider'] = provider
+                    df['test_file'] = csv_file.stem
+                    df['csv_path'] = str(csv_file)
+                    
+                    # Find corresponding pcap file
+                    pcap_file = csv_file.with_suffix('.pcap')
+                    if pcap_file.exists():
+                        print(f"  Processing: {provider}/{csv_file.name}")
+                        bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
+                        
+                        if bandwidth_data is not None and len(bandwidth_data) > 0:
+                            # Merge bandwidth data
+                            df = df.reset_index(drop=True)
+                            for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
+                                df[col] = 0
+                            
+                            for _, row in bandwidth_data.iterrows():
+                                idx = int(row['index'])
+                                if idx < len(df):
+                                    df.at[idx, 'bytes_sent'] = row['bytes_sent']
+                                    df.at[idx, 'bytes_received'] = row['bytes_received']
+                                    df.at[idx, 'packets_sent'] = row['packets_sent']
+                                    df.at[idx, 'packets_received'] = row['packets_received']
+                            
+                            df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
+                            
+                            print(f"    ✓ Extracted bandwidth for {len(df)} queries")
+                        else:
+                            print(f"    ⚠ Could not extract bandwidth data")
+                    else:
+                        print(f"  ⚠ No pcap found for {csv_file.name}")
+                    
+                    self.all_data.append(df)
+                    
+                except Exception as e:
+                    print(f"  ✗ Error loading {csv_file}: {e}")
+                    import traceback
+                    traceback.print_exc()
+        
+        print(f"\nTotal files loaded: {len(self.all_data)}")
+    
+    def create_line_graphs(self, output_dir='output/line_graphs'):
+        """Create line graphs for latency and bandwidth"""
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        
+        print("\nGenerating line graphs...")
+        
+        for df in self.all_data:
+            provider = df['provider'].iloc[0]
+            test_name = df['test_file'].iloc[0]
+            
+            df['query_index'] = range(1, len(df) + 1)
+            
+            # Create figure with 2 subplots
+            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
+            
+            # Plot 1: Latency
+            ax1.plot(df['query_index'], df['duration_ms'], marker='o', 
+                    markersize=4, linewidth=1, alpha=0.7, color='steelblue')
+            mean_latency = df['duration_ms'].mean()
+            ax1.axhline(y=mean_latency, color='r', linestyle='--', 
+                       label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
+            ax1.set_xlabel('Query Number', fontsize=12)
+            ax1.set_ylabel('Latency (ms)', fontsize=12)
+            ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
+            ax1.legend()
+            ax1.grid(True, alpha=0.3)
+            
+            # Plot 2: Bandwidth
+            if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
+                ax2.plot(df['query_index'], df['bytes_sent'], marker='s', 
+                        markersize=4, linewidth=1, alpha=0.7, 
+                        color='orange', label='Sent')
+                ax2.plot(df['query_index'], df['bytes_received'], marker='^', 
+                        markersize=4, linewidth=1, alpha=0.7, 
+                        color='green', label='Received')
+                
+                mean_sent = df['bytes_sent'].mean()
+                mean_received = df['bytes_received'].mean()
+                ax2.axhline(y=mean_sent, color='orange', linestyle='--', 
+                           linewidth=1.5, alpha=0.5)
+                ax2.axhline(y=mean_received, color='green', linestyle='--', 
+                           linewidth=1.5, alpha=0.5)
+                
+                ax2.set_xlabel('Query Number', fontsize=12)
+                ax2.set_ylabel('Bytes', fontsize=12)
+                ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)', 
+                             fontsize=12, fontweight='bold')
+                ax2.legend()
+                ax2.grid(True, alpha=0.3)
+            
+            fig.suptitle(f'{provider.upper()} - {test_name}', 
+                        fontsize=14, fontweight='bold')
+            plt.tight_layout()
+            
+            filename = f"{provider}_{test_name}.png"
+            plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
+            plt.close()
+            
+            print(f"  ✓ Created: {filename}")
+    
+    def get_protocol_name(self, test_file):
+        """Extract clean protocol name"""
+        name = test_file.replace('-persist', '')
+        
+        protocol_map = {
+            'udp': 'Plain DNS (UDP)',
+            'tls': 'DoT (DNS over TLS)',
+            'https': 'DoH (DNS over HTTPS)',
+            'doh3': 'DoH/3 (DNS over HTTP/3)',
+            'doq': 'DoQ (DNS over QUIC)'
+        }
+        
+        return protocol_map.get(name, name.upper())
+    
+    def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
+        """Create bar graphs comparing resolvers for latency and bandwidth"""
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        
+        print("\nGenerating resolver comparison graphs...")
+        
+        combined_df = pd.concat(self.all_data, ignore_index=True)
+        protocols = combined_df['test_file'].unique()
+        
+        for protocol in protocols:
+            protocol_data = combined_df[combined_df['test_file'] == protocol]
+            protocol_name = self.get_protocol_name(protocol)
+            
+            # Latency stats
+            latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
+                ('mean', 'mean'),
+                ('median', 'median'),
+                ('std', 'std')
+            ]).reset_index()
+            
+            # Create latency comparison
+            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+            fig.suptitle(f'{protocol_name} - Latency Comparison', 
+                        fontsize=16, fontweight='bold')
+            
+            # Mean latency
+            bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'], 
+                           color='steelblue', alpha=0.8, edgecolor='black')
+            ax1.errorbar(latency_stats['provider'], latency_stats['mean'], 
+                        yerr=latency_stats['std'], fmt='none', color='black', 
+                        capsize=5, alpha=0.6)
+            
+            for bar in bars1:
+                height = bar.get_height()
+                ax1.text(bar.get_x() + bar.get_width()/2., height,
+                        f'{height:.2f}',
+                        ha='center', va='bottom', fontweight='bold')
+            
+            ax1.set_xlabel('Resolver', fontsize=12)
+            ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
+            ax1.set_title('Mean Latency', fontsize=12)
+            ax1.grid(axis='y', alpha=0.3)
+            
+            # Median latency
+            bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'], 
+                           color='coral', alpha=0.8, edgecolor='black')
+            
+            for bar in bars2:
+                height = bar.get_height()
+                ax2.text(bar.get_x() + bar.get_width()/2., height,
+                        f'{height:.2f}',
+                        ha='center', va='bottom', fontweight='bold')
+            
+            ax2.set_xlabel('Resolver', fontsize=12)
+            ax2.set_ylabel('Median Latency (ms)', fontsize=12)
+            ax2.set_title('Median Latency', fontsize=12)
+            ax2.grid(axis='y', alpha=0.3)
+            
+            plt.tight_layout()
+            plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
+            plt.close()
+            print(f"  ✓ Created: latency_{protocol}.png")
+            
+            # Bandwidth comparison
+            if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
+                bandwidth_stats = protocol_data.groupby('provider').agg({
+                    'bytes_sent': 'mean',
+                    'bytes_received': 'mean',
+                    'total_bytes': 'mean'
+                }).reset_index()
+                
+                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
+                fig.suptitle(f'{protocol_name} - Bandwidth Comparison', 
+                            fontsize=16, fontweight='bold')
+                
+                # Sent vs Received
+                x = np.arange(len(bandwidth_stats))
+                width = 0.35
+                
+                bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
+                               label='Sent', color='orange', alpha=0.8, edgecolor='black')
+                bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
+                               label='Received', color='green', alpha=0.8, edgecolor='black')
+                
+                ax1.set_xlabel('Resolver', fontsize=12)
+                ax1.set_ylabel('Bytes per Query', fontsize=12)
+                ax1.set_title('Average Bandwidth per Query', fontsize=12)
+                ax1.set_xticks(x)
+                ax1.set_xticklabels(bandwidth_stats['provider'])
+                ax1.legend()
+                ax1.grid(axis='y', alpha=0.3)
+                
+                # Total bandwidth
+                bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
+                               color='purple', alpha=0.8, edgecolor='black')
+                
+                for bar in bars3:
+                    height = bar.get_height()
+                    ax2.text(bar.get_x() + bar.get_width()/2., height,
+                            f'{height:.0f}',
+                            ha='center', va='bottom', fontweight='bold')
+                
+                ax2.set_xlabel('Resolver', fontsize=12)
+                ax2.set_ylabel('Total Bytes per Query', fontsize=12)
+                ax2.set_title('Total Bandwidth per Query', fontsize=12)
+                ax2.grid(axis='y', alpha=0.3)
+                
+                plt.tight_layout()
+                plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
+                plt.close()
+                print(f"  ✓ Created: bandwidth_{protocol}.png")
+    
+    def generate_latex_tables(self, output_dir='output/tables'):
+        """Generate LaTeX tables with latency and bandwidth statistics"""
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        
+        print("\nGenerating LaTeX tables...")
+        
+        combined_df = pd.concat(self.all_data, ignore_index=True)
+        
+        # Generate latency table for each resolver
+        for provider in combined_df['provider'].unique():
+            provider_data = combined_df[combined_df['provider'] == provider]
+            
+            stats = provider_data.groupby('test_file')['duration_ms'].agg([
+                ('Mean', 'mean'),
+                ('Median', 'median'),
+                ('Std Dev', 'std'),
+                ('P95', lambda x: x.quantile(0.95)),
+                ('P99', lambda x: x.quantile(0.99))
+            ]).round(2)
+            
+            stats.index = stats.index.map(self.get_protocol_name)
+            stats.index.name = 'Protocol'
+            
+            latex_code = stats.to_latex(
+                caption=f'{provider.upper()} - Latency Statistics (ms)',
+                label=f'tab:{provider}_latency',
+                float_format="%.2f"
+            )
+            
+            with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
+                f.write(latex_code)
+            
+            print(f"  ✓ Created: {provider}_latency.tex")
+        
+        # Generate bandwidth table for each resolver
+        for provider in combined_df['provider'].unique():
+            provider_data = combined_df[combined_df['provider'] == provider]
+            
+            if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
+                continue
+            
+            bandwidth_stats = provider_data.groupby('test_file').agg({
+                'bytes_sent': 'mean',
+                'bytes_received': 'mean',
+                'total_bytes': 'mean'
+            }).round(2)
+            
+            bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
+            bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
+            bandwidth_stats.index.name = 'Protocol'
+            
+            latex_code = bandwidth_stats.to_latex(
+                caption=f'{provider.upper()} - Bandwidth Statistics',
+                label=f'tab:{provider}_bandwidth',
+                float_format="%.2f"
+            )
+            
+            with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
+                f.write(latex_code)
+            
+            print(f"  ✓ Created: {provider}_bandwidth.tex")
+        
+        # Generate protocol efficiency table
+        print("\nGenerating protocol efficiency table...")
+        
+        if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
+            protocol_bandwidth = combined_df.groupby('test_file').agg({
+                'bytes_sent': 'mean',
+                'bytes_received': 'mean',
+                'total_bytes': 'mean'
+            }).round(2)
+            
+            # Find UDP baseline
+            udp_baseline = None
+            for protocol in protocol_bandwidth.index:
+                if 'udp' in protocol:
+                    udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
+                    break
+            
+            if udp_baseline and udp_baseline > 0:
+                protocol_bandwidth['Overhead vs UDP (%)'] = (
+                    (protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
+                ).round(1)
+                protocol_bandwidth['Efficiency (%)'] = (
+                    100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
+                ).round(1)
+            
+            protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)', 
+                                         'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
+            protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
+            protocol_bandwidth.index.name = 'Protocol'
+            
+            latex_code = protocol_bandwidth.to_latex(
+                caption='Protocol Bandwidth Efficiency Comparison',
+                label='tab:protocol_efficiency',
+                float_format="%.2f"
+            )
+            
+            with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
+                f.write(latex_code)
+            
+            print(f"  ✓ Created: protocol_efficiency.tex")
+            print("\n--- Protocol Efficiency ---")
+            print(protocol_bandwidth.to_string())
+        
+        # Generate combined comparison tables
+        for metric in ['Mean', 'Median', 'P95']:
+            comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
+                ('Mean', 'mean'),
+                ('Median', 'median'),
+                ('P95', lambda x: x.quantile(0.95))
+            ]).round(2)
+            
+            pivot_table = comparison_stats[metric].unstack(level=0)
+            pivot_table.index = pivot_table.index.map(self.get_protocol_name)
+            pivot_table.index.name = 'Protocol'
+            
+            latex_code = pivot_table.to_latex(
+                caption=f'Resolver Latency Comparison - {metric} (ms)',
+                label=f'tab:comparison_{metric.lower()}',
+                float_format="%.2f"
+            )
+            
+            with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
+                f.write(latex_code)
+            
+            print(f"  ✓ Created: comparison_{metric.lower()}.tex")
+    
+    def run_analysis(self):
+        """Run the complete analysis"""
+        print("="*80)
+        print("Fast DNS QoS Analysis with Bandwidth")
+        print("="*80)
+        
+        self.load_data()
+        
+        if not self.all_data:
+            print("\n⚠ No data loaded.")
+            return
+        
+        print("\n" + "="*80)
+        self.create_line_graphs()
+        
+        print("\n" + "="*80)
+        self.create_resolver_comparison_bars()
+        
+        print("\n" + "="*80)
+        self.generate_latex_tables()
+        
+        print("\n" + "="*80)
+        print("✓ Analysis Complete!")
+        print("="*80)
+
+
+if __name__ == "__main__":
+    analyzer = FastDNSAnalyzer(results_dir='results')
+    analyzer.run_analysis()
@@ -0,0 +1,369 @@
+package main
+
+import (
+	"encoding/csv"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/google/gopacket"
+	"github.com/google/gopacket/layers"
+	"github.com/google/gopacket/pcapgo"
+)
+
+type QueryRecord struct {
+	Domain              string
+	QueryType           string
+	Protocol            string
+	DNSSec              string
+	AuthDNSSec          string
+	KeepAlive           string
+	DNSServer           string
+	Timestamp           string
+	DurationNs          int64
+	DurationMs          float64
+	RequestSizeBytes    int
+	ResponseSizeBytes   int
+	ResponseCode        string
+	Error               string
+	BytesSent           int64
+	BytesReceived       int64
+	PacketsSent         int64
+	PacketsReceived     int64
+	TotalBytes          int64
+}
+
+func parseRFC3339Nano(ts string) (time.Time, error) {
+	return time.Parse(time.RFC3339Nano, ts)
+}
+
+func processProviderFolder(providerPath string) error {
+	providerName := filepath.Base(providerPath)
+	fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
+	
+	files, err := os.ReadDir(providerPath)
+	if err != nil {
+		return err
+	}
+
+	processed := 0
+	skipped := 0
+	errors := 0
+
+	for _, file := range files {
+		if !strings.HasSuffix(file.Name(), ".csv") {
+			continue
+		}
+
+		csvPath := filepath.Join(providerPath, file.Name())
+		pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
+		
+		// Check if PCAP exists
+		if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
+			fmt.Printf("  ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
+			skipped++
+			continue
+		}
+
+		// Check if already processed (has backup)
+		backupPath := csvPath + ".bak"
+		if _, err := os.Stat(backupPath); err == nil {
+			fmt.Printf("  ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
+			skipped++
+			continue
+		}
+
+		fmt.Printf("  ↻ Processing: %s ... ", file.Name())
+		if err := processPair(csvPath, pcapPath); err != nil {
+			fmt.Printf("ERROR\n")
+			log.Printf("    Error: %v\n", err)
+			errors++
+		} else {
+			fmt.Printf("✓\n")
+			processed++
+		}
+	}
+
+	fmt.Printf("  Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
+	return nil
+}
+
+func processPair(csvPath, pcapPath string) error {
+	// Create backup
+	backupPath := csvPath + ".bak"
+	input, err := os.ReadFile(csvPath)
+	if err != nil {
+		return fmt.Errorf("backup read failed: %w", err)
+	}
+	if err := os.WriteFile(backupPath, input, 0644); err != nil {
+		return fmt.Errorf("backup write failed: %w", err)
+	}
+
+	// Read CSV records
+	records, err := readCSV(csvPath)
+	if err != nil {
+		return fmt.Errorf("CSV read failed: %w", err)
+	}
+
+	if len(records) == 0 {
+		return fmt.Errorf("no records in CSV")
+	}
+
+	// Read and parse PCAP
+	packets, err := readPCAPGo(pcapPath)
+	if err != nil {
+		return fmt.Errorf("PCAP read failed: %w", err)
+	}
+
+	// Enrich records with bandwidth data
+	enrichRecords(records, packets)
+
+	// Write enriched CSV
+	if err := writeCSV(csvPath, records); err != nil {
+		return fmt.Errorf("CSV write failed: %w", err)
+	}
+
+	return nil
+}
+
+func readCSV(path string) ([]*QueryRecord, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	r := csv.NewReader(f)
+	rows, err := r.ReadAll()
+	if err != nil {
+		return nil, err
+	}
+
+	if len(rows) < 2 {
+		return nil, fmt.Errorf("CSV has no data rows")
+	}
+
+	records := make([]*QueryRecord, 0, len(rows)-1)
+	for i := 1; i < len(rows); i++ {
+		row := rows[i]
+		if len(row) < 14 {
+			log.Printf("    Warning: Skipping malformed row %d", i+1)
+			continue
+		}
+
+		durationNs, _ := strconv.ParseInt(row[8], 10, 64)
+		durationMs, _ := strconv.ParseFloat(row[9], 64)
+		reqSize, _ := strconv.Atoi(row[10])
+		respSize, _ := strconv.Atoi(row[11])
+
+		records = append(records, &QueryRecord{
+			Domain:            row[0],
+			QueryType:         row[1],
+			Protocol:          row[2],
+			DNSSec:            row[3],
+			AuthDNSSec:        row[4],
+			KeepAlive:         row[5],
+			DNSServer:         row[6],
+			Timestamp:         row[7],
+			DurationNs:        durationNs,
+			DurationMs:        durationMs,
+			RequestSizeBytes:  reqSize,
+			ResponseSizeBytes: respSize,
+			ResponseCode:      row[12],
+			Error:             row[13],
+		})
+	}
+
+	return records, nil
+}
+
+type PacketInfo struct {
+	Timestamp time.Time
+	Size      int
+	IsSent    bool
+}
+
+func readPCAPGo(path string) ([]PacketInfo, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	reader, err := pcapgo.NewReader(f)
+	if err != nil {
+		return nil, err
+	}
+
+	var packets []PacketInfo
+	packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
+
+	for packet := range packetSource.Packets() {
+		if packet.NetworkLayer() == nil {
+			continue
+		}
+
+		isDNS := false
+		isSent := false
+
+		// Check UDP layer (DNS, DoQ, DoH3)
+		if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
+			udp := udpLayer.(*layers.UDP)
+			isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
+				udp.SrcPort == 853 || udp.DstPort == 853 ||
+				udp.SrcPort == 443 || udp.DstPort == 443
+			isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
+		}
+
+		// Check TCP layer (DoT, DoH)
+		if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
+			tcp := tcpLayer.(*layers.TCP)
+			isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
+				tcp.SrcPort == 853 || tcp.DstPort == 853 ||
+				tcp.SrcPort == 443 || tcp.DstPort == 443
+			isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
+		}
+
+		if isDNS {
+			packets = append(packets, PacketInfo{
+				Timestamp: packet.Metadata().Timestamp,
+				Size:      len(packet.Data()),
+				IsSent:    isSent,
+			})
+		}
+	}
+
+	return packets, nil
+}
+
+func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
+	for _, rec := range records {
+		ts, err := parseRFC3339Nano(rec.Timestamp)
+		if err != nil {
+			log.Printf("    Warning: Failed to parse timestamp: %s", rec.Timestamp)
+			continue
+		}
+
+		// Define time window for this query
+		windowStart := ts
+		windowEnd := ts.Add(time.Duration(rec.DurationNs))
+
+		var sent, recv, pktSent, pktRecv int64
+
+		// Match packets within the time window
+		for _, pkt := range packets {
+			if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
+				pkt.Timestamp.Before(windowEnd) {
+				if pkt.IsSent {
+					sent += int64(pkt.Size)
+					pktSent++
+				} else {
+					recv += int64(pkt.Size)
+					pktRecv++
+				}
+			}
+		}
+
+		rec.BytesSent = sent
+		rec.BytesReceived = recv
+		rec.PacketsSent = pktSent
+		rec.PacketsReceived = pktRecv
+		rec.TotalBytes = sent + recv
+	}
+}
+
+func writeCSV(path string, records []*QueryRecord) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	w := csv.NewWriter(f)
+	defer w.Flush()
+
+	// Write header
+	header := []string{
+		"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
+		"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
+		"request_size_bytes", "response_size_bytes", "response_code", "error",
+		"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
+	}
+	if err := w.Write(header); err != nil {
+		return err
+	}
+
+	// Write data rows
+	for _, rec := range records {
+		row := []string{
+			rec.Domain,
+			rec.QueryType,
+			rec.Protocol,
+			rec.DNSSec,
+			rec.AuthDNSSec,
+			rec.KeepAlive,
+			rec.DNSServer,
+			rec.Timestamp,
+			strconv.FormatInt(rec.DurationNs, 10),
+			strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
+			strconv.Itoa(rec.RequestSizeBytes),
+			strconv.Itoa(rec.ResponseSizeBytes),
+			rec.ResponseCode,
+			rec.Error,
+			strconv.FormatInt(rec.BytesSent, 10),
+			strconv.FormatInt(rec.BytesReceived, 10),
+			strconv.FormatInt(rec.PacketsSent, 10),
+			strconv.FormatInt(rec.PacketsReceived, 10),
+			strconv.FormatInt(rec.TotalBytes, 10),
+		}
+		if err := w.Write(row); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func main() {
+	resultsDir := "results"
+	providers := []string{"adguard", "cloudflare", "google", "quad9"}
+
+	fmt.Println("╔═══════════════════════════════════════════════╗")
+	fmt.Println("║   DNS PCAP Preprocessor v1.0                  ║")
+	fmt.Println("║   Enriching ALL CSVs with bandwidth metrics   ║")
+	fmt.Println("╚═══════════════════════════════════════════════╝")
+
+	totalProcessed := 0
+	totalSkipped := 0
+	totalErrors := 0
+
+	for _, provider := range providers {
+		providerPath := filepath.Join(resultsDir, provider)
+		if _, err := os.Stat(providerPath); os.IsNotExist(err) {
+			fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
+			continue
+		}
+
+		if err := processProviderFolder(providerPath); err != nil {
+			log.Printf("Error processing %s: %v\n", provider, err)
+			totalErrors++
+		}
+	}
+
+	fmt.Println("\n╔═══════════════════════════════════════════════╗")
+	fmt.Println("║   Preprocessing Complete!                     ║")
+	fmt.Println("╚═══════════════════════════════════════════════╝")
+	fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
+	fmt.Printf("  • bytes_sent          - Total bytes sent to DNS server\n")
+	fmt.Printf("  • bytes_received      - Total bytes received from DNS server\n")
+	fmt.Printf("  • packets_sent        - Number of packets sent\n")
+	fmt.Printf("  • packets_received    - Number of packets received\n")
+	fmt.Printf("  • total_bytes         - Sum of sent + received bytes\n")
+	fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
+	fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
+	fmt.Printf("   but all files now have complete bandwidth metrics!\n")
+}
@@ -1,250 +1,362 @@
 #!/usr/bin/env python3
 """
-Add network metrics from PCAP files to DNS CSV files.
-Adds: raw_bytes_total, raw_packet_count, overhead_bytes, efficiency_percent
+Fast PCAP Preprocessor for DNS QoS Analysis
+Loads PCAP into memory first, then uses binary search for matching.
+Uses LAN IP to determine direction (LAN = sent, non-LAN = received).
 """

 import csv
-import os
-import argparse
-import re
+import shutil
 from pathlib import Path
-from datetime import datetime, timezone
-from scapy.all import rdpcap
+from typing import Dict, List, NamedTuple
+import time

-def parse_timestamp(ts_str):
-    """Parse timestamp with timezone and nanoseconds (RFC3339Nano)."""
-    match = re.match(
-        r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)([\+\-]\d{2}:\d{2})',
-        ts_str
-    )
-    
-    if not match:
-        raise ValueError(f"Invalid timestamp format: {ts_str}")
-    
-    base, nanos, tz = match.groups()
-    micros = nanos[:6].ljust(6, '0')
-    iso_str = f"{base}.{micros}{tz}"
-    dt = datetime.fromisoformat(iso_str)
-    full_nanos = int(nanos.ljust(9, '0'))
-    
-    return dt, full_nanos
+import dpkt
+from dateutil import parser as date_parser

-def read_pcap(pcap_path):
-    """Read PCAP and return list of (timestamp_epoch, size)."""
+
+class Packet(NamedTuple):
+    """Lightweight packet representation."""
+    timestamp: float
+    size: int
+    is_outbound: bool  # True if from LAN, False if from internet
+
+
+class QueryWindow:
+    """Efficient query window representation."""
+    __slots__ = ['index', 'start', 'end', 'sent', 'received', 'pkts_sent', 'pkts_received']
+    
+    def __init__(self, index: int, start: float, end: float):
+        self.index = index
+        self.start = start
+        self.end = end
+        self.sent = 0
+        self.received = 0
+        self.pkts_sent = 0
+        self.pkts_received = 0
+
+
+def parse_csv_timestamp(ts_str: str) -> float:
+    """Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
+    dt = date_parser.isoparse(ts_str)
+    return dt.timestamp()
+
+
+def is_lan_ip(ip_bytes: bytes) -> bool:
+    """Check if IP is a private/LAN address."""
+    if len(ip_bytes) != 4:
+        return False
+    
+    first = ip_bytes[0]
+    second = ip_bytes[1]
+    
+    # 10.0.0.0/8
+    if first == 10:
+        return True
+    
+    # 172.16.0.0/12
+    if first == 172 and 16 <= second <= 31:
+        return True
+    
+    # 192.168.0.0/16
+    if first == 192 and second == 168:
+        return True
+    
+    # 127.0.0.0/8 (localhost)
+    if first == 127:
+        return True
+    
+    return False
+
+
+def load_pcap_into_memory(pcap_path: Path) -> List[Packet]:
+    """Load all packets from PCAP into memory with minimal data."""
    packets = []
+    
+    print(f"    Loading PCAP into memory...")
+    start_time = time.time()
+    
    try:
-        pkts = rdpcap(str(pcap_path))
-        for pkt in pkts:
-            timestamp = float(pkt.time)
-            length = len(pkt)
-            packets.append((timestamp, length))
+        with open(pcap_path, 'rb') as f:
+            try:
+                pcap = dpkt.pcap.Reader(f)
+            except:
+                # Try pcapng format
+                f.seek(0)
+                pcap = dpkt.pcapng.Reader(f)
+            
+            for ts, buf in pcap:
+                try:
+                    packet_time = float(ts)
+                    packet_size = len(buf)
+                    
+                    # Parse to get source IP
+                    eth = dpkt.ethernet.Ethernet(buf)
+                    
+                    # Default to outbound if we can't determine
+                    is_outbound = True
+                    
+                    if isinstance(eth.data, dpkt.ip.IP):
+                        ip = eth.data
+                        src_ip = ip.src
+                        is_outbound = is_lan_ip(src_ip)
+                    
+                    packets.append(Packet(
+                        timestamp=packet_time,
+                        size=packet_size,
+                        is_outbound=is_outbound
+                    ))
+                    
+                except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, AttributeError):
+                    continue
+    
    except Exception as e:
-        print(f"  ❌ Error reading PCAP: {e}")
+        print(f"    Error reading PCAP: {e}")
        return []
    
+    elapsed = time.time() - start_time
+    print(f"    Loaded {len(packets):,} packets in {elapsed:.2f}s")
+    
+    # Sort by timestamp for binary search
+    packets.sort(key=lambda p: p.timestamp)
+    
    return packets

-def find_packets_in_window(packets, start_ts, start_nanos, duration_ns):
-    """Find packets within exact time window."""
-    start_epoch = start_ts.timestamp()
-    start_epoch += (start_nanos % 1_000_000) / 1_000_000_000
-    end_epoch = start_epoch + (duration_ns / 1_000_000_000)
-    
-    total_bytes = 0
-    packet_count = 0
-    
-    for pkt_ts, pkt_len in packets:
-        if start_epoch <= pkt_ts <= end_epoch:
-            total_bytes += pkt_len
-            packet_count += 1
-    
-    return total_bytes, packet_count

-def enhance_csv(csv_path, pcap_path, output_path, debug=False):
-    """Add PCAP metrics to CSV."""
-    if not os.path.exists(pcap_path):
-        print(f"⚠️  PCAP not found: {pcap_path}")
-        return False
-    
-    print(f"Processing: {os.path.basename(csv_path)}")
-    
-    # Read PCAP
-    packets = read_pcap(pcap_path)
-    print(f"  Loaded {len(packets)} packets")
-    
+def find_packets_in_window(
+    packets: List[Packet],
+    start_time: float,
+    end_time: float,
+    left_hint: int = 0
+) -> tuple[List[Packet], int]:
+    """
+    Binary search to find all packets within time window.
+    Returns (matching_packets, left_index_hint_for_next_search).
+    """
    if not packets:
-        print("  ❌ No packets found")
-        return False
+        return [], 0
    
-    if packets and debug:
-        first_pcap = packets[0][0]
-        last_pcap = packets[-1][0]
-        print(f"  First PCAP packet: {first_pcap:.6f}")
-        print(f"  Last PCAP packet:  {last_pcap:.6f}")
-        print(f"  PCAP duration: {(last_pcap - first_pcap):.3f}s")
+    # Binary search for first packet >= start_time
+    left, right = left_hint, len(packets) - 1
+    first_idx = len(packets)
    
-    # Read CSV
-    with open(csv_path, 'r', newline='') as f:
-        reader = csv.DictReader(f)
-        fieldnames = list(reader.fieldnames) + [
-            'raw_bytes_total',
-            'raw_packet_count',
-            'overhead_bytes',
-            'efficiency_percent'
-        ]
-        rows = list(reader)
+    while left <= right:
+        mid = (left + right) // 2
+        if packets[mid].timestamp >= start_time:
+            first_idx = mid
+            right = mid - 1
+        else:
+            left = mid + 1
    
-    if rows and debug:
-        try:
-            first_ts, _ = parse_timestamp(rows[0]['timestamp'])
-            last_ts, _ = parse_timestamp(rows[-1]['timestamp'])
-            print(f"  First CSV query:  {first_ts.timestamp():.6f}")
-            print(f"  Last CSV query:   {last_ts.timestamp():.6f}")
-            offset = packets[0][0] - first_ts.timestamp()
-            print(f"  Time offset (PCAP - CSV): {offset:.3f}s")
-        except:
-            pass
+    # No packets in range
+    if first_idx >= len(packets) or packets[first_idx].timestamp > end_time:
+        return [], first_idx
    
-    # Enhance rows
-    enhanced = []
-    matched = 0
+    # Collect all packets in window
+    matching = []
+    idx = first_idx
+    while idx < len(packets) and packets[idx].timestamp <= end_time:
+        matching.append(packets[idx])
+        idx += 1
    
-    for i, row in enumerate(rows):
-        try:
-            timestamp, nanos = parse_timestamp(row['timestamp'])
-            duration_ns = int(row['duration_ns'])
-            
-            raw_bytes, packet_count = find_packets_in_window(
-                packets, timestamp, nanos, duration_ns
-            )
-            
-            useful_bytes = (
-                int(row['request_size_bytes']) + 
-                int(row['response_size_bytes'])
-            )
-            overhead = raw_bytes - useful_bytes
-            efficiency = (
-                (useful_bytes / raw_bytes * 100) 
-                if raw_bytes > 0 else 0
-            )
-            
-            row['raw_bytes_total'] = raw_bytes
-            row['raw_packet_count'] = packet_count
-            row['overhead_bytes'] = overhead
-            row['efficiency_percent'] = f"{efficiency:.2f}"
-            
-            if raw_bytes > 0:
-                matched += 1
-            
-            # Debug first few queries
-            if debug and i < 3:
-                print(f"  Query {i}: {row['domain']}")
-                print(f"    Duration: {duration_ns / 1e6:.3f}ms")
-                print(f"    Matched packets: {packet_count}")
-                print(f"    Raw bytes: {raw_bytes}")
-                print(f"    Useful bytes: {useful_bytes}")
-                print(f"    Efficiency: {efficiency:.2f}%")
-            
-        except (ValueError, KeyError) as e:
-            if debug:
-                print(f"  Error processing row {i}: {e}")
-            row['raw_bytes_total'] = 0
-            row['raw_packet_count'] = 0
-            row['overhead_bytes'] = 0
-            row['efficiency_percent'] = "0.00"
-        
-        enhanced.append(row)
-    
-    print(f"  Matched: {matched}/{len(rows)} queries")
-    
-    if matched == 0:
-        print("  ⚠️  WARNING: No queries matched any packets!")
-        print("     This might indicate timestamp misalignment.")
-    
-    # Write output
-    os.makedirs(os.path.dirname(output_path), exist_ok=True)
-    with open(output_path, 'w', newline='') as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
-        writer.writeheader()
-        writer.writerows(enhanced)
-    
-    print(f"  ✓ Saved: {output_path}")
-    return True
+    return matching, first_idx

-def main():
-    parser = argparse.ArgumentParser(
-        description='Add PCAP network metrics to DNS CSV files'
-    )
-    parser.add_argument('input_dir', help='Input directory (e.g., results)')
-    parser.add_argument(
-        '--output',
-        default='./results_enriched',
-        help='Output directory (default: ./results_enriched)'
-    )
-    parser.add_argument(
-        '--dry-run',
-        action='store_true',
-        help='Preview files without processing'
-    )
-    parser.add_argument(
-        '--debug',
-        action='store_true',
-        help='Show detailed timing information'
-    )
+
+def load_csv_queries(csv_path: Path) -> List[Dict]:
+    """Load CSV and create query data structures."""
+    queries = []
+    with open(csv_path, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            try:
+                ts_epoch = parse_csv_timestamp(row['timestamp'])
+                duration_s = float(row['duration_ns']) / 1e9
+                queries.append({
+                    'data': row,
+                    'start_time': ts_epoch,
+                    'end_time': ts_epoch + duration_s,
+                })
+            except Exception as e:
+                print(f"  Warning: Skipping row - {e}")
+                continue
+    return queries
+
+
+def match_packets_to_queries(
+    packets: List[Packet],
+    queries: List[Dict]
+) -> List[Dict]:
+    """Match packets to query windows using binary search."""
+    if not queries or not packets:
+        return queries
    
-    args = parser.parse_args()
+    print(f"    Matching packets to queries...")
+    start_time = time.time()
    
-    print("=" * 60)
-    print("ENHANCE DNS CSVs WITH PCAP METRICS")
-    print("=" * 60)
-    print(f"Input:  {args.input_dir}")
-    print(f"Output: {args.output}")
-    if args.debug:
-        print("Debug:  ENABLED")
-    print()
+    # Initialize metrics
+    for q in queries:
+        q['bytes_sent'] = 0
+        q['bytes_received'] = 0
+        q['packets_sent'] = 0
+        q['packets_received'] = 0
+        q['total_bytes'] = 0
    
-    # Find CSV files
-    csv_files = list(Path(args.input_dir).rglob('*.csv'))
+    # Sort queries by start time for sequential processing
+    queries_sorted = sorted(enumerate(queries), key=lambda x: x[1]['start_time'])
    
-    if not csv_files:
-        print("❌ No CSV files found")
-        return 1
+    matched_packets = 0
+    left_hint = 0  # Optimization: start next search from here
    
-    print(f"Found {len(csv_files)} CSV files\n")
+    for original_idx, q in queries_sorted:
+        matching, left_hint = find_packets_in_window(
+            packets,
+            q['start_time'],
+            q['end_time'],
+            left_hint
+        )
+        
+        for pkt in matching:
+            matched_packets += 1
+            if pkt.is_outbound:
+                q['bytes_sent'] += pkt.size
+                q['packets_sent'] += 1
+            else:
+                q['bytes_received'] += pkt.size
+                q['packets_received'] += 1
+        
+        q['total_bytes'] = q['bytes_sent'] + q['bytes_received']
    
-    if args.dry_run:
-        print("DRY RUN - would process:")
-        for csv_path in csv_files:
-            pcap_path = csv_path.with_suffix('.pcap')
-            print(f"  {csv_path.relative_to(args.input_dir)}")
-            print(f"    PCAP: {'✓' if pcap_path.exists() else '✗'}")
-        return 0
+    elapsed = time.time() - start_time
+    print(f"    Matched {matched_packets:,} packets in {elapsed:.2f}s")
    
-    # Process files
-    success = 0
-    failed = 0
+    # Statistics
+    total_sent = sum(q['bytes_sent'] for q in queries)
+    total_recv = sum(q['bytes_received'] for q in queries)
+    queries_with_data = sum(1 for q in queries if q['total_bytes'] > 0)
+    print(f"    Total: {total_sent:,} bytes sent, {total_recv:,} bytes received")
+    print(f"    Queries with data: {queries_with_data}/{len(queries)}")
+    
+    return queries
+
+
+def write_enriched_csv(
+    csv_path: Path, queries: List[Dict], backup: bool = True
+):
+    """Write enriched CSV with bandwidth columns."""
+    if backup and csv_path.exists():
+        backup_path = csv_path.with_suffix('.csv.bak')
+        if not backup_path.exists():  # Don't overwrite existing backup
+            shutil.copy2(csv_path, backup_path)
+            print(f"  Backup: {backup_path.name}")
+    
+    # Get fieldnames
+    original_fields = list(queries[0]['data'].keys())
+    new_fields = [
+        'bytes_sent',
+        'bytes_received',
+        'packets_sent',
+        'packets_received',
+        'total_bytes',
+    ]
+    fieldnames = original_fields + new_fields
+    
+    with open(csv_path, 'w', encoding='utf-8', newline='') as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        
+        for q in queries:
+            row = q['data'].copy()
+            for field in new_fields:
+                row[field] = q[field]
+            writer.writerow(row)
+    
+    print(f"  Written: {csv_path.name}")
+
+
+def process_provider_directory(provider_path: Path):
+    """Process all CSV/PCAP pairs in a provider directory."""
+    print(f"\n{'='*60}")
+    print(f"Processing: {provider_path.name.upper()}")
+    print(f"{'='*60}")
+    
+    csv_files = sorted(provider_path.glob('*.csv'))
+    processed = 0
+    total_time = 0
    
    for csv_path in csv_files:
-        pcap_path = csv_path.with_suffix('.pcap')
-        rel_path = csv_path.relative_to(args.input_dir)
-        output_path = Path(args.output) / rel_path
+        # Skip backup files
+        if '.bak' in csv_path.name:
+            continue
        
-        if enhance_csv(str(csv_path), str(pcap_path), str(output_path), 
-                       args.debug):
-            success += 1
-        else:
-            failed += 1
-        print()
+        pcap_path = csv_path.with_suffix('.pcap')
+        
+        if not pcap_path.exists():
+            print(f"\n  ⚠ Skipping {csv_path.name} - no matching PCAP")
+            continue
+        
+        print(f"\n  📁 {csv_path.name}")
+        file_start = time.time()
+        
+        # Load PCAP into memory first
+        packets = load_pcap_into_memory(pcap_path)
+        if not packets:
+            print(f"    ⚠ No packets found in PCAP")
+            continue
+        
+        # Load CSV queries
+        queries = load_csv_queries(csv_path)
+        if not queries:
+            print(f"    ⚠ No valid queries found")
+            continue
+        
+        print(f"    Loaded {len(queries):,} queries")
+        
+        # Match packets to queries
+        enriched_queries = match_packets_to_queries(packets, queries)
+        
+        # Write enriched CSV
+        write_enriched_csv(csv_path, enriched_queries)
+        
+        file_time = time.time() - file_start
+        total_time += file_time
+        processed += 1
+        print(f"    ✓ Completed in {file_time:.2f}s")
    
-    # Summary
-    print("=" * 60)
-    print(f"✓ Success: {success}")
-    print(f"✗ Failed:  {failed}")
-    print(f"Total:     {len(csv_files)}")
-    print(f"\nOutput: {args.output}")
-    
-    return 0 if failed == 0 else 1
+    print(f"\n  {'='*58}")
+    print(f"  {provider_path.name}: {processed} files in {total_time:.2f}s")
+    print(f"  {'='*58}")

-if __name__ == "__main__":
-    exit(main())
+
+def main():
+    """Main preprocessing pipeline."""
+    overall_start = time.time()
+    
+    print("\n" + "="*60)
+    print("DNS PCAP PREPROCESSOR - Memory-Optimized Edition")
+    print("="*60)
+    
+    results_dir = Path('results')
+    
+    if not results_dir.exists():
+        print(f"\n❌ Error: '{results_dir}' directory not found")
+        return
+    
+    providers = ['adguard', 'cloudflare', 'google', 'quad9']
+    
+    for provider in providers:
+        provider_path = results_dir / provider
+        if provider_path.exists():
+            process_provider_directory(provider_path)
+        else:
+            print(f"\n⚠ Warning: Provider directory not found: {provider}")
+    
+    overall_time = time.time() - overall_start
+    
+    print("\n" + "="*60)
+    print(f"✓ PREPROCESSING COMPLETE")
+    print(f"  Total time: {overall_time:.2f}s ({overall_time/60:.1f} minutes)")
+    print("="*60 + "\n")
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,426 @@
+#!/usr/bin/env python3
+"""
+Convert DNS CSV files to SQLite database.
+Creates a single normalized table with unified DNSSEC handling.
+"""
+
+import sqlite3
+import csv
+from pathlib import Path
+from dateutil import parser as date_parser
+
+
+def create_database_schema(conn: sqlite3.Connection):
+    """Create the database schema with indexes."""
+    cursor = conn.cursor()
+    
+    # Main queries table
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS dns_queries (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            
+            -- Metadata
+            provider TEXT NOT NULL,
+            protocol TEXT NOT NULL,
+            dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
+            
+            -- Query details
+            domain TEXT NOT NULL,
+            query_type TEXT NOT NULL,
+            keep_alive BOOLEAN NOT NULL,
+            dns_server TEXT NOT NULL,
+            
+            -- Timing
+            timestamp TEXT NOT NULL,
+            timestamp_unix REAL NOT NULL,
+            duration_ns INTEGER NOT NULL,
+            duration_ms REAL NOT NULL,
+            
+            -- Size metrics
+            request_size_bytes INTEGER,
+            response_size_bytes INTEGER,
+            
+            -- Network metrics (from PCAP)
+            bytes_sent INTEGER DEFAULT 0,
+            bytes_received INTEGER DEFAULT 0,
+            packets_sent INTEGER DEFAULT 0,
+            packets_received INTEGER DEFAULT 0,
+            total_bytes INTEGER DEFAULT 0,
+            
+            -- Response
+            response_code TEXT,
+            error TEXT
+        )
+    """)
+    
+    # Create indexes for common queries
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_provider 
+        ON dns_queries(provider)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_protocol 
+        ON dns_queries(protocol)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_dnssec_mode 
+        ON dns_queries(dnssec_mode)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_keep_alive 
+        ON dns_queries(keep_alive)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec 
+        ON dns_queries(provider, protocol, dnssec_mode)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_timestamp 
+        ON dns_queries(timestamp_unix)
+    """)
+    
+    cursor.execute("""
+        CREATE INDEX IF NOT EXISTS idx_domain 
+        ON dns_queries(domain)
+    """)
+    
+    conn.commit()
+
+
+def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
+    """
+    Extract base protocol, DNSSEC mode, and keep_alive from filename.
+    Returns (base_protocol, dnssec_mode, keep_alive)
+    
+    Examples:
+        'udp.csv' -> ('udp', 'off', False)
+        'udp-auth.csv' -> ('udp', 'auth', False)
+        'tls.csv' -> ('tls', 'off', False)
+        'tls-persist.csv' -> ('tls', 'off', True)
+        'https-persist.csv' -> ('https', 'off', True)
+        'https-auth-persist.csv' -> ('https', 'auth', True)
+        'https-trust-persist.csv' -> ('https', 'trust', True)
+        'doh3-auth.csv' -> ('doh3', 'auth', False)
+        'doq.csv' -> ('doq', 'off', False)
+    """
+    name = filename.replace('.csv', '')
+    
+    # Check for persist suffix (keep_alive)
+    keep_alive = False
+    if name.endswith('-persist'):
+        keep_alive = True
+        name = name.replace('-persist', '')
+    
+    # Check for DNSSEC suffix
+    dnssec_mode = 'off'
+    if name.endswith('-auth'):
+        dnssec_mode = 'auth'
+        name = name.replace('-auth', '')
+    elif name.endswith('-trust'):
+        dnssec_mode = 'trust'
+        name = name.replace('-trust', '')
+    
+    # For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
+    if name in ['udp', 'doh3', 'doq']:
+        keep_alive = False
+    
+    return (name, dnssec_mode, keep_alive)
+
+
+def str_to_bool(value: str) -> bool:
+    """Convert string boolean to Python bool."""
+    return value.lower() in ('true', '1', 'yes')
+
+
+def import_csv_to_db(
+    csv_path: Path,
+    provider: str,
+    conn: sqlite3.Connection
+) -> int:
+    """Import a CSV file into the database."""
+    protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
+    
+    cursor = conn.cursor()
+    rows_imported = 0
+    
+    with open(csv_path, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        
+        for row in reader:
+            try:
+                # Parse timestamp to Unix epoch
+                dt = date_parser.isoparse(row['timestamp'])
+                timestamp_unix = dt.timestamp()
+                
+                # Use keep_alive from filename (more reliable than CSV)
+                keep_alive = keep_alive_from_filename
+                
+                # Handle optional fields (may not exist in older CSVs)
+                bytes_sent = int(row.get('bytes_sent', 0) or 0)
+                bytes_received = int(row.get('bytes_received', 0) or 0)
+                packets_sent = int(row.get('packets_sent', 0) or 0)
+                packets_received = int(row.get('packets_received', 0) or 0)
+                total_bytes = int(row.get('total_bytes', 0) or 0)
+                
+                cursor.execute("""
+                    INSERT INTO dns_queries (
+                        provider, protocol, dnssec_mode,
+                        domain, query_type, keep_alive,
+                        dns_server, timestamp, timestamp_unix,
+                        duration_ns, duration_ms,
+                        request_size_bytes, response_size_bytes,
+                        bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
+                        response_code, error
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    provider,
+                    protocol,
+                    dnssec_mode,
+                    row['domain'],
+                    row['query_type'],
+                    keep_alive,
+                    row['dns_server'],
+                    row['timestamp'],
+                    timestamp_unix,
+                    int(row['duration_ns']),
+                    float(row['duration_ms']),
+                    int(row.get('request_size_bytes') or 0),
+                    int(row.get('response_size_bytes') or 0),
+                    bytes_sent,
+                    bytes_received,
+                    packets_sent,
+                    packets_received,
+                    total_bytes,
+                    row.get('response_code', ''),
+                    row.get('error', '')
+                ))
+                
+                rows_imported += 1
+                
+            except Exception as e:
+                print(f"      Warning: Skipping row - {e}")
+                continue
+    
+    conn.commit()
+    return rows_imported
+
+
+def main():
+    """Main import pipeline."""
+    print("\n" + "="*60)
+    print("CSV to SQLite Database Converter")
+    print("="*60)
+    
+    results_dir = Path('results')
+    db_path = Path('dns.db')
+    
+    if not results_dir.exists():
+        print(f"\n❌ Error: '{results_dir}' directory not found")
+        return
+    
+    # Remove existing database
+    if db_path.exists():
+        print(f"\n⚠ Removing existing database: {db_path}")
+        db_path.unlink()
+    
+    # Create database and schema
+    print(f"\n📊 Creating database: {db_path}")
+    conn = sqlite3.connect(db_path)
+    create_database_schema(conn)
+    print("✓ Schema created")
+    
+    # Import CSVs
+    providers = ['adguard', 'cloudflare', 'google', 'quad9']
+    total_rows = 0
+    total_files = 0
+    
+    for provider in providers:
+        provider_path = results_dir / provider
+        
+        if not provider_path.exists():
+            print(f"\n⚠ Skipping {provider} - directory not found")
+            continue
+        
+        print(f"\n{'='*60}")
+        print(f"Importing: {provider.upper()}")
+        print(f"{'='*60}")
+        
+        csv_files = sorted(provider_path.glob('*.csv'))
+        provider_rows = 0
+        provider_files = 0
+        
+        for csv_path in csv_files:
+            # Skip backup files
+            if '.bak' in csv_path.name:
+                continue
+            
+            protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
+            ka_str = "persistent" if keep_alive else "non-persist"
+            print(f"  📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
+            
+            rows = import_csv_to_db(csv_path, provider, conn)
+            print(f"     ✓ Imported {rows:,} rows")
+            
+            provider_rows += rows
+            provider_files += 1
+        
+        print(f"\n  Total: {provider_files} files, {provider_rows:,} rows")
+        total_rows += provider_rows
+        total_files += provider_files
+    
+    # Create summary
+    print(f"\n{'='*60}")
+    print("Database Summary")
+    print(f"{'='*60}")
+    
+    cursor = conn.cursor()
+    
+    # Total counts
+    cursor.execute("SELECT COUNT(*) FROM dns_queries")
+    total_queries = cursor.fetchone()[0]
+    
+    cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
+    unique_providers = cursor.fetchone()[0]
+    
+    cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
+    unique_protocols = cursor.fetchone()[0]
+    
+    cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
+    unique_domains = cursor.fetchone()[0]
+    
+    print(f"\nTotal queries:     {total_queries:,}")
+    print(f"Providers:         {unique_providers}")
+    print(f"Protocols:         {unique_protocols}")
+    print(f"Unique domains:    {unique_domains}")
+    
+    # Show breakdown by provider, protocol, DNSSEC, and keep_alive
+    print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
+    print(f"{'-'*80}")
+    
+    cursor.execute("""
+        SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
+        FROM dns_queries
+        GROUP BY provider, protocol, dnssec_mode, keep_alive
+        ORDER BY provider, protocol, dnssec_mode, keep_alive
+    """)
+    
+    current_provider = None
+    for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
+        if current_provider != provider:
+            if current_provider is not None:
+                print()
+            current_provider = provider
+        
+        ka_str = "✓" if keep_alive else "✗"
+        print(f"  {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
+    
+    # Protocol distribution
+    print(f"\n{'-'*80}")
+    print("Protocol Distribution:")
+    print(f"{'-'*80}")
+    
+    cursor.execute("""
+        SELECT protocol, COUNT(*) as count
+        FROM dns_queries
+        GROUP BY protocol
+        ORDER BY protocol
+    """)
+    
+    for protocol, count in cursor.fetchall():
+        pct = (count / total_queries) * 100
+        print(f"  {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
+    
+    # DNSSEC mode distribution
+    print(f"\n{'-'*80}")
+    print("DNSSEC Mode Distribution:")
+    print(f"{'-'*80}")
+    
+    cursor.execute("""
+        SELECT dnssec_mode, COUNT(*) as count
+        FROM dns_queries
+        GROUP BY dnssec_mode
+        ORDER BY dnssec_mode
+    """)
+    
+    for dnssec_mode, count in cursor.fetchall():
+        pct = (count / total_queries) * 100
+        print(f"  {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
+    
+    # Keep-Alive distribution
+    print(f"\n{'-'*80}")
+    print("Keep-Alive Distribution:")
+    print(f"{'-'*80}")
+    
+    cursor.execute("""
+        SELECT keep_alive, COUNT(*) as count
+        FROM dns_queries
+        GROUP BY keep_alive
+    """)
+    
+    for keep_alive, count in cursor.fetchall():
+        ka_label = "Persistent" if keep_alive else "Non-persistent"
+        pct = (count / total_queries) * 100
+        print(f"  {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
+    
+    conn.close()
+    
+    print(f"\n{'='*60}")
+    print(f"✓ Database created successfully: {db_path}")
+    print(f"  Total: {total_files} files, {total_rows:,} rows")
+    print(f"{'='*60}\n")
+    
+    # Print usage examples
+    print("\n📖 Usage Examples for Metabase:")
+    print(f"{'-'*60}")
+    
+    print("\n1. Compare protocols (DNSSEC off, persistent only):")
+    print("""   SELECT provider, protocol, 
+          AVG(duration_ms) as avg_latency,
+          AVG(total_bytes) as avg_bytes
+      FROM dns_queries
+      WHERE dnssec_mode = 'off' AND keep_alive = 1
+      GROUP BY provider, protocol;""")
+    
+    print("\n2. DNSSEC impact on UDP:")
+    print("""   SELECT provider, dnssec_mode,
+          AVG(duration_ms) as avg_latency
+      FROM dns_queries
+      WHERE protocol = 'udp'
+      GROUP BY provider, dnssec_mode;""")
+    
+    print("\n3. Keep-alive impact on TLS:")
+    print("""   SELECT provider, keep_alive,
+          AVG(duration_ms) as avg_latency,
+          AVG(total_bytes) as avg_bytes
+      FROM dns_queries
+      WHERE protocol = 'tls' AND dnssec_mode = 'off'
+      GROUP BY provider, keep_alive;""")
+    
+    print("\n4. Time series for line graphs:")
+    print("""   SELECT timestamp_unix, duration_ms, total_bytes
+      FROM dns_queries
+      WHERE provider = 'cloudflare' 
+        AND protocol = 'https'
+        AND dnssec_mode = 'off'
+        AND keep_alive = 1
+      ORDER BY timestamp_unix;""")
+    
+    print("\n5. Overall comparison table:")
+    print("""   SELECT protocol, dnssec_mode, keep_alive,
+          COUNT(*) as queries,
+          AVG(duration_ms) as avg_latency,
+          AVG(total_bytes) as avg_bytes
+      FROM dns_queries
+      GROUP BY protocol, dnssec_mode, keep_alive
+      ORDER BY protocol, dnssec_mode, keep_alive;""")
+    
+    print(f"\n{'-'*60}\n")
+
+
+if __name__ == '__main__':
+    main()