From 92351a80a9df1aea1dbd31bb7e77ccf355a59d16 Mon Sep 17 00:00:00 2001 From: afranco Date: Wed, 4 Feb 2026 22:08:05 +0000 Subject: [PATCH] feat(dns): add dnscrypt and dns over tcp --- client/client.go | 47 +- common/protocols/dnscrypt/dnscrypt.go | 84 +- common/protocols/dotcp/dotcp.go | 222 ++++++ .../{do53/do53.go => doudp/doudp.go} | 54 +- go.mod | 23 +- go.sum | 62 +- internal/qol/utils.go | 28 +- scripts/analysis/analyze_dns_metrics.py | 751 +++++++++++------- scripts/analysis/analyze_simple.py | 536 +++++++++++++ scripts/tools/add_extra_metrics_to_csv.go | 369 +++++++++ scripts/tools/add_extra_metrics_to_csv.py | 542 ++++++++----- scripts/tools/csvs_to_sqlite.py | 426 ++++++++++ 12 files changed, 2576 insertions(+), 568 deletions(-) create mode 100644 common/protocols/dotcp/dotcp.go rename common/protocols/{do53/do53.go => doudp/doudp.go} (50%) create mode 100644 scripts/analysis/analyze_simple.py create mode 100644 scripts/tools/add_extra_metrics_to_csv.go create mode 100644 scripts/tools/csvs_to_sqlite.py diff --git a/client/client.go b/client/client.go index 565ada6..5400755 100644 --- a/client/client.go +++ b/client/client.go @@ -8,10 +8,12 @@ import ( "github.com/afonsofrancof/sdns-proxy/common/dnssec" "github.com/afonsofrancof/sdns-proxy/common/logger" - "github.com/afonsofrancof/sdns-proxy/common/protocols/do53" + "github.com/afonsofrancof/sdns-proxy/common/protocols/dnscrypt" "github.com/afonsofrancof/sdns-proxy/common/protocols/doh" "github.com/afonsofrancof/sdns-proxy/common/protocols/doq" "github.com/afonsofrancof/sdns-proxy/common/protocols/dot" + "github.com/afonsofrancof/sdns-proxy/common/protocols/dotcp" + "github.com/afonsofrancof/sdns-proxy/common/protocols/doudp" "github.com/miekg/dns" ) @@ -37,7 +39,7 @@ type Options struct { func New(upstream string, opts Options) (DNSClient, error) { logger.Debug("Creating DNS client for upstream: %s with options: %+v", upstream, opts) - // Try to parse as URL first + // Try to parse as URL parsedURL, err := url.Parse(upstream) if err != nil { logger.Error("Invalid upstream format: %v", err) @@ -51,7 +53,7 @@ func New(upstream string, opts Options) (DNSClient, error) { logger.Debug("Parsing %s as URL with scheme %s", upstream, parsedURL.Scheme) baseClient, err = createClientFromURL(parsedURL, opts) } else { - // No scheme - treat as plain DNS address + // No scheme - treat as plain DNS address (defaults to UDP) logger.Debug("Parsing %s as plain DNS address", upstream) baseClient, err = createClientFromPlainAddress(upstream, opts) } @@ -200,7 +202,8 @@ func createClientFromPlainAddress(address string, opts Options) (DNSClient, erro } logger.Debug("Creating client from plain address: host=%s, port=%s", host, port) - return createClient("", host, port, "", opts) + // Default to UDP for plain addresses + return createClient("udp", host, port, "", opts) } func getDefaultPort(scheme string) string { @@ -212,6 +215,8 @@ func getDefaultPort(scheme string) string { port = "853" case "quic", "doq": port = "853" + case "dnscrypt": + port = "443" } logger.Debug("Default port for scheme %s: %s", scheme, port) return port @@ -232,13 +237,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err scheme, host, port, path, opts.DNSSEC, opts.KeepAlive) switch scheme { - case "udp", "tcp", "do53", "": - config := do53.Config{ + case "udp", "doudp", "": + config := doudp.Config{ HostAndPort: net.JoinHostPort(host, port), DNSSEC: opts.DNSSEC, } - logger.Debug("Creating DO53 client with config: %+v", config) - return do53.New(config) + logger.Debug("Creating DoUDP client with config: %+v", config) + return doudp.New(config) + + case "tcp", "dotcp": + config := dotcp.Config{ + HostAndPort: net.JoinHostPort(host, port), + DNSSEC: opts.DNSSEC, + KeepAlive: opts.KeepAlive, + } + logger.Debug("Creating DoTCP client with config: %+v", config) + return dotcp.New(config) case "https", "doh": config := doh.Config{ @@ -274,11 +288,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err logger.Debug("Creating DoT client with config: %+v", config) return dot.New(config) + case "sdns": + config := dnscrypt.Config{ + // Janky solution but whatever + // Here we rejoin them as the client wants them together + // The host is not really a host but whatever + ServerStamp: fmt.Sprintf("%v://%v",scheme,host), + DNSSEC: opts.DNSSEC, + } + logger.Debug("Creating DNSCrypt client with stamp") + return dnscrypt.New(config) + case "doq": config := doq.Config{ - Host: host, - Port: port, - DNSSEC: opts.DNSSEC, + Host: host, + Port: port, + DNSSEC: opts.DNSSEC, } logger.Debug("Creating DoQ client with config: %+v", config) return doq.New(config) diff --git a/common/protocols/dnscrypt/dnscrypt.go b/common/protocols/dnscrypt/dnscrypt.go index 9939b27..61b59e9 100644 --- a/common/protocols/dnscrypt/dnscrypt.go +++ b/common/protocols/dnscrypt/dnscrypt.go @@ -1,3 +1,85 @@ package dnscrypt -// DNSCrypt resolver implementation +import ( + "fmt" + "time" + + "github.com/afonsofrancof/sdns-proxy/common/logger" + "github.com/ameshkov/dnscrypt/v2" + "github.com/miekg/dns" +) + +type Config struct { + ServerStamp string + DNSSEC bool + WriteTimeout time.Duration + ReadTimeout time.Duration +} + +type Client struct { + resolver *dnscrypt.Client + config Config + ri *dnscrypt.ResolverInfo +} + +func New(config Config) (*Client, error) { + logger.Debug("Creating DNSCrypt client with stamp: %s", config.ServerStamp) + + if config.ServerStamp == "" { + logger.Error("DNSCrypt client creation failed: empty ServerStamp") + return nil, fmt.Errorf("dnscrypt: ServerStamp cannot be empty") + } + if config.WriteTimeout <= 0 { + config.WriteTimeout = 5 * time.Second + } + if config.ReadTimeout <= 0 { + config.ReadTimeout = 10 * time.Second + } + + resolver := &dnscrypt.Client{ + Net: "udp", + Timeout: config.ReadTimeout, + } + + // Resolve the server info from the stamp + ri, err := resolver.Dial(config.ServerStamp) + if err != nil { + logger.Error("DNSCrypt failed to dial server: %v", err) + return nil, fmt.Errorf("dnscrypt: failed to dial server: %w", err) + } + + logger.Debug("DNSCrypt client created (DNSSEC: %v)", config.DNSSEC) + + return &Client{ + resolver: resolver, + config: config, + ri: ri, + }, nil +} + +func (c *Client) Close() { + // The dnscrypt library doesn't require explicit cleanup +} + +func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) { + if len(msg.Question) > 0 { + question := msg.Question[0] + logger.Debug("DNSCrypt query: %s %s", question.Name, dns.TypeToString[question.Qtype]) + } + + if c.config.DNSSEC { + msg.SetEdns0(4096, true) + } + + response, err := c.resolver.Exchange(msg, c.ri) + if err != nil { + logger.Error("DNSCrypt query failed: %v", err) + return nil, fmt.Errorf("dnscrypt: query failed: %w", err) + } + + if len(response.Answer) > 0 { + logger.Debug("DNSCrypt response: %d answers", len(response.Answer)) + } + + return response, nil +} diff --git a/common/protocols/dotcp/dotcp.go b/common/protocols/dotcp/dotcp.go new file mode 100644 index 0000000..0e130a7 --- /dev/null +++ b/common/protocols/dotcp/dotcp.go @@ -0,0 +1,222 @@ +package dotcp + +import ( + "encoding/binary" + "fmt" + "io" + "net" + "sync" + "time" + + "github.com/afonsofrancof/sdns-proxy/common/logger" + "github.com/miekg/dns" +) + +type Config struct { + HostAndPort string + DNSSEC bool + KeepAlive bool + WriteTimeout time.Duration + ReadTimeout time.Duration +} + +type Client struct { + hostAndPort string + config Config + conn net.Conn + connMutex sync.Mutex +} + +func New(config Config) (*Client, error) { + logger.Debug("Creating DoTCP client: %s (KeepAlive: %v)", config.HostAndPort, config.KeepAlive) + + if config.HostAndPort == "" { + logger.Error("DoTCP client creation failed: empty HostAndPort") + return nil, fmt.Errorf("dotcp: HostAndPort cannot be empty") + } + if config.WriteTimeout <= 0 { + config.WriteTimeout = 2 * time.Second + } + if config.ReadTimeout <= 0 { + config.ReadTimeout = 5 * time.Second + } + + client := &Client{ + hostAndPort: config.HostAndPort, + config: config, + } + + if config.KeepAlive { + if err := client.ensureConnection(); err != nil { + logger.Error("DoTCP failed to establish initial connection: %v", err) + return nil, fmt.Errorf("failed to establish initial connection: %w", err) + } + } + + logger.Debug("DoTCP client created: %s (DNSSEC: %v, KeepAlive: %v)", config.HostAndPort, config.DNSSEC, config.KeepAlive) + return client, nil +} + +func (c *Client) Close() { + logger.Debug("Closing DoTCP client") + c.connMutex.Lock() + defer c.connMutex.Unlock() + + if c.conn != nil { + c.conn.Close() + c.conn = nil + } +} + +func (c *Client) ensureConnection() error { + c.connMutex.Lock() + defer c.connMutex.Unlock() + + if c.conn != nil { + if err := c.conn.SetReadDeadline(time.Now().Add(time.Millisecond)); err == nil { + var testBuf [1]byte + _, err := c.conn.Read(testBuf[:]) + c.conn.SetReadDeadline(time.Time{}) + + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return nil + } + + logger.Debug("DoTCP connection test failed, reconnecting: %v", err) + c.conn.Close() + c.conn = nil + } + } + + logger.Debug("Establishing DoTCP connection to %s", c.hostAndPort) + dialer := &net.Dialer{ + Timeout: c.config.WriteTimeout, + } + + conn, err := dialer.Dial("tcp", c.hostAndPort) + if err != nil { + logger.Error("DoTCP connection failed to %s: %v", c.hostAndPort, err) + return err + } + + c.conn = conn + logger.Debug("DoTCP connection established to %s", c.hostAndPort) + return nil +} + +func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) { + if len(msg.Question) > 0 { + question := msg.Question[0] + logger.Debug("DoTCP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort) + } + + if c.config.KeepAlive { + if err := c.ensureConnection(); err != nil { + return nil, fmt.Errorf("dotcp: failed to ensure connection: %w", err) + } + } else { + c.connMutex.Lock() + if c.conn != nil { + c.conn.Close() + c.conn = nil + } + c.connMutex.Unlock() + + if err := c.ensureConnection(); err != nil { + return nil, fmt.Errorf("dotcp: failed to create connection: %w", err) + } + } + + if c.config.DNSSEC { + msg.SetEdns0(4096, true) + } + + packed, err := msg.Pack() + if err != nil { + logger.Error("DoTCP failed to pack message: %v", err) + return nil, fmt.Errorf("dotcp: failed to pack message: %w", err) + } + + // DNS over TCP uses 2-byte length prefix + length := make([]byte, 2) + binary.BigEndian.PutUint16(length, uint16(len(packed))) + data := append(length, packed...) + + c.connMutex.Lock() + conn := c.conn + c.connMutex.Unlock() + + if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil { + logger.Error("DoTCP failed to set write deadline: %v", err) + return nil, fmt.Errorf("dotcp: failed to set write deadline: %w", err) + } + + if _, err := conn.Write(data); err != nil { + logger.Error("DoTCP failed to write message to %s: %v", c.hostAndPort, err) + + if c.config.KeepAlive { + logger.Debug("DoTCP write failed with keep-alive, attempting reconnect") + if reconnectErr := c.ensureConnection(); reconnectErr != nil { + return nil, fmt.Errorf("dotcp: failed to reconnect: %w", reconnectErr) + } + + c.connMutex.Lock() + conn = c.conn + c.connMutex.Unlock() + + if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil { + return nil, fmt.Errorf("dotcp: failed to set write deadline after reconnect: %w", err) + } + + if _, err := conn.Write(data); err != nil { + return nil, fmt.Errorf("dotcp: failed to write message after reconnect: %w", err) + } + } else { + return nil, fmt.Errorf("dotcp: failed to write message: %w", err) + } + } + + if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil { + logger.Error("DoTCP failed to set read deadline: %v", err) + return nil, fmt.Errorf("dotcp: failed to set read deadline: %w", err) + } + + lengthBuf := make([]byte, 2) + if _, err := io.ReadFull(conn, lengthBuf); err != nil { + logger.Error("DoTCP failed to read response length from %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("dotcp: failed to read response length: %w", err) + } + + msgLen := binary.BigEndian.Uint16(lengthBuf) + if msgLen > dns.MaxMsgSize { + logger.Error("DoTCP response too large from %s: %d bytes", c.hostAndPort, msgLen) + return nil, fmt.Errorf("dotcp: response message too large: %d", msgLen) + } + + buffer := make([]byte, msgLen) + if _, err := io.ReadFull(conn, buffer); err != nil { + logger.Error("DoTCP failed to read response from %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("dotcp: failed to read response: %w", err) + } + + response := new(dns.Msg) + if err := response.Unpack(buffer); err != nil { + logger.Error("DoTCP failed to unpack response from %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("dotcp: failed to unpack response: %w", err) + } + + if len(response.Answer) > 0 { + logger.Debug("DoTCP response from %s: %d answers", c.hostAndPort, len(response.Answer)) + } + + if !c.config.KeepAlive { + c.connMutex.Lock() + if c.conn != nil { + c.conn.Close() + c.conn = nil + } + c.connMutex.Unlock() + } + + return response, nil +} diff --git a/common/protocols/do53/do53.go b/common/protocols/doudp/doudp.go similarity index 50% rename from common/protocols/do53/do53.go rename to common/protocols/doudp/doudp.go index da626d3..dfaad4d 100644 --- a/common/protocols/do53/do53.go +++ b/common/protocols/doudp/doudp.go @@ -1,4 +1,4 @@ -package do53 +package doudp import ( "fmt" @@ -22,11 +22,11 @@ type Client struct { } func New(config Config) (*Client, error) { - logger.Debug("Creating DO53 client: %s", config.HostAndPort) - + logger.Debug("Creating DoUDP client: %s", config.HostAndPort) + if config.HostAndPort == "" { - logger.Error("DO53 client creation failed: empty HostAndPort") - return nil, fmt.Errorf("do53: HostAndPort cannot be empty") + logger.Error("DoUDP client creation failed: empty HostAndPort") + return nil, fmt.Errorf("doudp: HostAndPort cannot be empty") } if config.WriteTimeout <= 0 { config.WriteTimeout = 2 * time.Second @@ -35,7 +35,7 @@ func New(config Config) (*Client, error) { config.ReadTimeout = 5 * time.Second } - logger.Debug("DO53 client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC) + logger.Debug("DoUDP client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC) return &Client{ hostAndPort: config.HostAndPort, @@ -44,36 +44,35 @@ func New(config Config) (*Client, error) { } func (c *Client) Close() { - logger.Debug("Closing DO53 client") + logger.Debug("Closing DoUDP client") } func (c *Client) createConnection() (*net.UDPConn, error) { udpAddr, err := net.ResolveUDPAddr("udp", c.hostAndPort) if err != nil { - logger.Error("DO53 failed to resolve address %s: %v", c.hostAndPort, err) + logger.Error("DoUDP failed to resolve address %s: %v", c.hostAndPort, err) return nil, fmt.Errorf("failed to resolve UDP address: %w", err) } conn, err := net.DialUDP("udp", nil, udpAddr) if err != nil { - logger.Error("DO53 failed to connect to %s: %v", c.hostAndPort, err) + logger.Error("DoUDP failed to connect to %s: %v", c.hostAndPort, err) return nil, err } - logger.Debug("DO53 connection established to %s", c.hostAndPort) + logger.Debug("DoUDP connection established to %s", c.hostAndPort) return conn, nil } func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) { if len(msg.Question) > 0 { question := msg.Question[0] - logger.Debug("DO53 query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort) + logger.Debug("DoUDP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort) } - // Create connection for this query conn, err := c.createConnection() if err != nil { - return nil, fmt.Errorf("do53: failed to create connection: %w", err) + return nil, fmt.Errorf("doudp: failed to create connection: %w", err) } defer conn.Close() @@ -83,43 +82,40 @@ func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) { packedMsg, err := msg.Pack() if err != nil { - logger.Error("DO53 failed to pack message: %v", err) - return nil, fmt.Errorf("do53: failed to pack DNS message: %w", err) + logger.Error("DoUDP failed to pack message: %v", err) + return nil, fmt.Errorf("doudp: failed to pack DNS message: %w", err) } - // Send query if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil { - logger.Error("DO53 failed to set write deadline: %v", err) - return nil, fmt.Errorf("do53: failed to set write deadline: %w", err) + logger.Error("DoUDP failed to set write deadline: %v", err) + return nil, fmt.Errorf("doudp: failed to set write deadline: %w", err) } if _, err := conn.Write(packedMsg); err != nil { - logger.Error("DO53 failed to send query to %s: %v", c.hostAndPort, err) - return nil, fmt.Errorf("do53: failed to send DNS query: %w", err) + logger.Error("DoUDP failed to send query to %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("doudp: failed to send DNS query: %w", err) } - // Read response if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil { - logger.Error("DO53 failed to set read deadline: %v", err) - return nil, fmt.Errorf("do53: failed to set read deadline: %w", err) + logger.Error("DoUDP failed to set read deadline: %v", err) + return nil, fmt.Errorf("doudp: failed to set read deadline: %w", err) } buffer := make([]byte, dns.MaxMsgSize) n, err := conn.Read(buffer) if err != nil { - logger.Error("DO53 failed to read response from %s: %v", c.hostAndPort, err) - return nil, fmt.Errorf("do53: failed to read DNS response: %w", err) + logger.Error("DoUDP failed to read response from %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("doudp: failed to read DNS response: %w", err) } - // Parse response response := new(dns.Msg) if err := response.Unpack(buffer[:n]); err != nil { - logger.Error("DO53 failed to unpack response from %s: %v", c.hostAndPort, err) - return nil, fmt.Errorf("do53: failed to unpack DNS response: %w", err) + logger.Error("DoUDP failed to unpack response from %s: %v", c.hostAndPort, err) + return nil, fmt.Errorf("doudp: failed to unpack DNS response: %w", err) } if len(response.Answer) > 0 { - logger.Debug("DO53 response from %s: %d answers", c.hostAndPort, len(response.Answer)) + logger.Debug("DoUDP response from %s: %d answers", c.hostAndPort, len(response.Answer)) } return response, nil diff --git a/go.mod b/go.mod index 4184108..d461476 100644 --- a/go.mod +++ b/go.mod @@ -1,26 +1,29 @@ module github.com/afonsofrancof/sdns-proxy -go 1.24.0 +go 1.24.1 require ( github.com/alecthomas/kong v1.8.1 + github.com/ameshkov/dnscrypt/v2 v2.4.0 github.com/google/gopacket v1.1.19 - github.com/miekg/dns v1.1.63 + github.com/miekg/dns v1.1.65 github.com/quic-go/quic-go v0.50.0 - golang.org/x/net v0.35.0 + golang.org/x/net v0.38.0 ) require ( + github.com/AdguardTeam/golibs v0.32.7 // indirect + github.com/ameshkov/dnsstamps v1.0.3 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/onsi/ginkgo/v2 v2.9.5 // indirect github.com/quic-go/qpack v0.5.1 // indirect go.uber.org/mock v0.5.0 // indirect - golang.org/x/crypto v0.33.0 // indirect - golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect - golang.org/x/mod v0.18.0 // indirect - golang.org/x/sync v0.11.0 // indirect - golang.org/x/sys v0.30.0 // indirect - golang.org/x/text v0.22.0 // indirect - golang.org/x/tools v0.22.0 // indirect + golang.org/x/crypto v0.37.0 // indirect + golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect + golang.org/x/mod v0.24.0 // indirect + golang.org/x/sync v0.13.0 // indirect + golang.org/x/sys v0.32.0 // indirect + golang.org/x/text v0.24.0 // indirect + golang.org/x/tools v0.31.0 // indirect ) diff --git a/go.sum b/go.sum index 69ae768..3d0c331 100644 --- a/go.sum +++ b/go.sum @@ -1,23 +1,29 @@ +github.com/AdguardTeam/golibs v0.32.7 h1:3dmGlAVgmvquCCwHsvEl58KKcRAK3z1UnjMnwSIeDH4= +github.com/AdguardTeam/golibs v0.32.7/go.mod h1:bE8KV1zqTzgZjmjFyBJ9f9O5DEKO717r7e57j1HclJA= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/kong v1.8.1 h1:6aamvWBE/REnR/BCq10EcozmcpUPc5aGI1lPAWdB0EE= github.com/alecthomas/kong v1.8.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= +github.com/ameshkov/dnscrypt/v2 v2.4.0 h1:if6ZG2cuQmcP2TwSY+D0+8+xbPfoatufGlOQTMNkI9o= +github.com/ameshkov/dnscrypt/v2 v2.4.0/go.mod h1:WpEFV2uhebXb8Jhes/5/fSdpmhGV8TL22RDaeWwV6hI= +github.com/ameshkov/dnsstamps v1.0.3 h1:Srzik+J9mivH1alRACTbys2xOxs0lRH9qnTA7Y1OYVo= +github.com/ameshkov/dnsstamps v1.0.3/go.mod h1:Ii3eUu73dx4Vw5O4wjzmT5+lkCwovjzaEZZ4gKyIH5A= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= @@ -25,8 +31,8 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/miekg/dns v1.1.63 h1:8M5aAw6OMZfFXTT7K5V0Eu5YiiL8l7nUAkyN6C9YwaY= -github.com/miekg/dns v1.1.63/go.mod h1:6NGHfjhpmr5lt3XPLuyfDJi5AXbNIPM9PY6H6sF1Nfs= +github.com/miekg/dns v1.1.65 h1:0+tIPHzUW0GCge7IiK3guGP57VAw7hoPDfApjkMD1Fc= +github.com/miekg/dns v1.1.65/go.mod h1:Dzw9769uoKVaLuODMDZz9M6ynFU6Em65csPuoi8G0ck= github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= @@ -39,43 +45,43 @@ github.com/quic-go/quic-go v0.50.0 h1:3H/ld1pa3CYhkcc20TPIyG1bNsdhn9qZBGN3b9/UyU github.com/quic-go/quic-go v0.50.0/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= -golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= -golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= +golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw= +golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= -golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= +golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= -golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= -golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= +golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= +golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= -golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= -golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= +golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/qol/utils.go b/internal/qol/utils.go index df3ae4e..77468bb 100644 --- a/internal/qol/utils.go +++ b/internal/qol/utils.go @@ -1,3 +1,4 @@ +// ./internal/qol/utils.go package qol import ( @@ -15,7 +16,7 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli base := proto var flags []string - + if dnssec { if authDNSSEC { flags = append(flags, "auth") @@ -57,6 +58,7 @@ func cleanServerName(server string) string { "94.140.15.15": "adguard", "dns.adguard.com": "adguard", "dns.adguard-dns.com": "adguard", + "AQMAAAAAAAAAETk0LjE0MC4xNS4xNTo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20": "adguard", } serverName := "" @@ -76,11 +78,31 @@ func cleanServerName(server string) string { } func DetectProtocol(upstream string) string { + if strings.Contains(upstream, "://") { u, err := url.Parse(upstream) if err == nil && u.Scheme != "" { - return strings.ToLower(u.Scheme) + scheme := strings.ToLower(u.Scheme) + // Normalize scheme names + switch scheme { + case "udp", "doudp": + return "doudp" + case "tcp", "dotcp": + return "dotcp" + case "tls", "dot": + return "dot" + case "https", "doh": + return "doh" + case "doh3": + return "doh3" + case "doq": + return "doq" + case "sdns": + return "dnscrypt" + default: + return scheme + } } } - return "do53" + return "doudp" } diff --git a/scripts/analysis/analyze_dns_metrics.py b/scripts/analysis/analyze_dns_metrics.py index 9428b10..81e6a3b 100644 --- a/scripts/analysis/analyze_dns_metrics.py +++ b/scripts/analysis/analyze_dns_metrics.py @@ -1,289 +1,498 @@ -import csv -import os -import statistics -from collections import defaultdict +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np from pathlib import Path +from scipy import stats +import warnings -def map_server_to_resolver(server): - """Map server address/domain to resolver name""" - server_lower = server.lower() - - if '1.1.1.1' in server_lower or 'cloudflare' in server_lower: - return 'Cloudflare' - elif '8.8.8.8' in server_lower or 'google' in server_lower: - return 'Google' - elif '9.9.9.9' in server_lower or 'quad9' in server_lower: - return 'Quad9' - elif 'adguard' in server_lower: - return 'AdGuard' - else: - return server # Fallback to original server name +warnings.filterwarnings('ignore') -def extract_from_new_format(filename): - """Parse new filename format: protocol[-flags]-timestamp.csv""" - base = filename.replace('.csv', '') - parts = base.split('-') - - if len(parts) < 2: - return None, None, None, None - - protocol = parts[0] - timestamp = parts[-1] - - # Flags are everything between protocol and timestamp - flags_str = '-'.join(parts[1:-1]) - - # Determine DNSSEC status - if 'auth' in flags_str: - dnssec_status = 'auth' # Authoritative DNSSEC - elif 'trust' in flags_str: - dnssec_status = 'trust' # Trust-based DNSSEC - else: - dnssec_status = 'off' - - keepalive_status = 'on' if 'persist' in flags_str else 'off' - - return protocol, dnssec_status, keepalive_status, flags_str +# Set style for publication-quality plots +sns.set_style("whitegrid") +plt.rcParams['figure.dpi'] = 300 +plt.rcParams['savefig.dpi'] = 300 +plt.rcParams['font.size'] = 10 +plt.rcParams['figure.figsize'] = (12, 6) -def extract_server_info_from_csv(row): - """Extract DNSSEC info from CSV row data""" - dnssec = row.get('dnssec', 'false').lower() == 'true' - auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true' - keepalive = row.get('keep_alive', 'false').lower() == 'true' - - if dnssec: - if auth_dnssec: - dnssec_status = 'auth' - else: - dnssec_status = 'trust' - else: - dnssec_status = 'off' - - keepalive_status = 'on' if keepalive else 'off' - - return dnssec_status, keepalive_status - -def extract_server_info(file_path, row): - """Extract info using directory structure, filename, and CSV data""" - path = Path(file_path) - - # First try to get DNSSEC info from CSV row (most accurate) - try: - csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row) - protocol = row.get('protocol', '').lower() +class DNSAnalyzer: + def __init__(self, results_dir='results'): + self.results_dir = Path(results_dir) + self.df = None - # Get server from directory structure - parts = path.parts - if len(parts) >= 4: - potential_date = parts[-2] - # Check if it's a date like YYYY-MM-DD - if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit(): - server = parts[-3] # resolver folder (e.g., cloudflare) - return protocol, server, csv_dnssec_status, csv_keepalive_status + def load_all_data(self): + """Load all CSV files from the results directory""" + data_frames = [] - # Fallback to DNS server field - server = row.get('dns_server', '') - return protocol, server, csv_dnssec_status, csv_keepalive_status + providers = ['adguard', 'cloudflare', 'google', 'quad9'] - except (KeyError, ValueError): - pass - - # Fallback to filename parsing - filename = path.name - protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename) - - if protocol: - # Get server from directory structure - parts = path.parts - if len(parts) >= 4: - potential_date = parts[-2] - if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit(): - server = parts[-3] - return protocol, server, dnssec_status, keepalive_status - - # Fallback to DNS server field - server = row.get('dns_server', '') - return protocol, server, dnssec_status, keepalive_status - - return None, None, None, None - -def get_dnssec_display_name(dnssec_status): - """Convert DNSSEC status to display name""" - if dnssec_status == 'auth': - return 'DNSSEC (Authoritative)' - elif dnssec_status == 'trust': - return 'DNSSEC (Trust-based)' - else: - return 'No DNSSEC' - -def analyze_dns_data(root_directory, output_file): - """Analyze DNS data and generate metrics""" - - # Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]} - measurements = defaultdict(list) - - # Walk through all directories - for root, dirs, files in os.walk(root_directory): - for file in files: - if file.endswith('.csv'): - file_path = os.path.join(root, file) - print(f"Processing: {file_path}") + for provider in providers: + provider_path = self.results_dir / provider + if not provider_path.exists(): + continue + for csv_file in provider_path.glob('*.csv'): try: - with open(file_path, 'r', newline='') as csvfile: - reader = csv.DictReader(csvfile) - - for row_num, row in enumerate(reader, 2): # Start at 2 since header is row 1 - try: - protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row) - - if protocol and server: - resolver = map_server_to_resolver(server) - duration_ms = float(row.get('duration_ms', 0)) - - # Only include successful queries - if row.get('response_code', '') in ['NOERROR', '']: - key = (resolver, protocol, dnssec_status, keepalive_status) - measurements[key].append(duration_ms) - - except (ValueError, TypeError) as e: - print(f"Data parse error in {file_path} row {row_num}: {e}") - continue - + df = pd.read_csv(csv_file) + df['provider'] = provider + df['test_config'] = csv_file.stem + data_frames.append(df) except Exception as e: - print(f"Error processing file {file_path}: {e}") - continue - - # Calculate statistics grouped by resolver first, then by configuration - resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) - - for (resolver, protocol, dnssec, keepalive), durations in measurements.items(): - if durations: - stats = { - 'protocol': protocol.upper(), - 'dnssec': dnssec, - 'keepalive': keepalive, - 'total_queries': len(durations), - 'avg_latency_ms': round(statistics.mean(durations), 3), - 'median_latency_ms': round(statistics.median(durations), 3), - 'min_latency_ms': round(min(durations), 3), - 'max_latency_ms': round(max(durations), 3), - 'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3), - 'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3), - 'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3) - } - # Group by resolver -> dnssec -> keepalive -> protocol - resolver_results[resolver][dnssec][keepalive].append(stats) - - # Sort each configuration's results by average latency - for resolver in resolver_results: - for dnssec in resolver_results[resolver]: - for keepalive in resolver_results[resolver][dnssec]: - resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms']) - - # Write to CSV with all data - all_results = [] - for resolver in resolver_results: - for dnssec in resolver_results[resolver]: - for keepalive in resolver_results[resolver][dnssec]: - for result in resolver_results[resolver][dnssec][keepalive]: - result['resolver'] = resolver - all_results.append(result) - - with open(output_file, 'w', newline='') as csvfile: - fieldnames = [ - 'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries', - 'avg_latency_ms', 'median_latency_ms', 'min_latency_ms', - 'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms' - ] + print(f"Error loading {csv_file}: {e}") - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(all_results) - - print(f"\nAnalysis complete! Full results written to {output_file}") - print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}") - - def print_configuration_table(resolver, dnssec_status, keepalive_status, results): - """Print a formatted table for a specific configuration""" - ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN" - dnssec_display = get_dnssec_display_name(dnssec_status) + self.df = pd.concat(data_frames, ignore_index=True) + self._clean_and_enrich_data() + print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations") - print(f"\n {dnssec_display} - {ka_indicator}") - print(" " + "-" * 90) - print(f" {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}") - print(" " + "-" * 90) + def _clean_and_enrich_data(self): + """Clean data and add useful columns""" + # Remove failed queries + self.df = self.df[self.df['error'].isna()] - for result in results: - print(f" {result['protocol']:<12} {result['total_queries']:<8} " - f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} " - f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} " - f"{result['p95_latency_ms']:<10}") - - # Print results grouped by resolver first - print(f"\n{'=' * 100}") - print("DNS RESOLVER PERFORMANCE COMPARISON") - print(f"{'=' * 100}") - - for resolver in sorted(resolver_results.keys()): - print(f"\n{resolver} DNS Resolver") - print("=" * 100) + # Extract protocol base (remove -auth, -trust suffixes) + self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True) - # Order configurations logically - config_order = [ - ('off', 'off'), # No DNSSEC, New connections - ('off', 'on'), # No DNSSEC, Persistent - ('trust', 'off'), # Trust DNSSEC, New connections - ('trust', 'on'), # Trust DNSSEC, Persistent - ('auth', 'off'), # Auth DNSSEC, New connections - ('auth', 'on'), # Auth DNSSEC, Persistent - ] + # DNSSEC configuration + self.df['dnssec_mode'] = 'none' + self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth' + self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust' - for dnssec_status, keepalive_status in config_order: - if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]: - results = resolver_results[resolver][dnssec_status][keepalive_status] - if results: # Only print if there are results - print_configuration_table(resolver, dnssec_status, keepalive_status, results) - - # Summary comparison across resolvers - print(f"\n{'=' * 100}") - print("CROSS-RESOLVER PROTOCOL COMPARISON") - print(f"{'=' * 100}") - - # Group by protocol and configuration for cross-resolver comparison - protocol_comparison = defaultdict(lambda: defaultdict(list)) - - for resolver in resolver_results: - for dnssec in resolver_results[resolver]: - for keepalive in resolver_results[resolver][dnssec]: - for result in resolver_results[resolver][dnssec][keepalive]: - config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}" - protocol_comparison[result['protocol']][config_key].append({ - 'resolver': resolver, - 'avg_latency_ms': result['avg_latency_ms'], - 'total_queries': result['total_queries'] - }) - - for protocol in sorted(protocol_comparison.keys()): - print(f"\n{protocol} Protocol Comparison") - print("-" * 100) + # Protocol categories + self.df['protocol_category'] = self.df['protocol_base'].map({ + 'udp': 'Plain DNS', + 'tls': 'DoT', + 'https': 'DoH', + 'doh3': 'DoH/3', + 'doq': 'DoQ' + }) - for config in sorted(protocol_comparison[protocol].keys()): - resolvers_data = protocol_comparison[protocol][config] - if resolvers_data: - print(f"\n {config}") - print(" " + "-" * 60) - print(f" {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}") - print(" " + "-" * 60) - - # Sort by average latency - resolvers_data.sort(key=lambda x: x['avg_latency_ms']) - - for data in resolvers_data: - print(f" {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}") + # Connection persistence + self.df['persistence'] = self.df['keep_alive'].fillna(False) + + def generate_summary_statistics(self): + """Generate comprehensive summary statistics""" + print("\n" + "="*80) + print("SUMMARY STATISTICS") + print("="*80) + + # Overall statistics + print("\n--- Overall Performance ---") + print(f"Total queries: {len(self.df)}") + print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms") + print(f"Median latency: {self.df['duration_ms'].median():.2f} ms") + print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms") + print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms") + + # By protocol + print("\n--- Performance by Protocol ---") + protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([ + ('count', 'count'), + ('mean', 'mean'), + ('median', 'median'), + ('std', 'std'), + ('p95', lambda x: x.quantile(0.95)), + ('p99', lambda x: x.quantile(0.99)) + ]).round(2) + print(protocol_stats) + + # By provider + print("\n--- Performance by Provider ---") + provider_stats = self.df.groupby('provider')['duration_ms'].agg([ + ('count', 'count'), + ('mean', 'mean'), + ('median', 'median'), + ('std', 'std'), + ('p95', lambda x: x.quantile(0.95)) + ]).round(2) + print(provider_stats) + + # DNSSEC impact + print("\n--- DNSSEC Validation Impact ---") + dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([ + ('count', 'count'), + ('mean', 'mean'), + ('median', 'median'), + ('overhead_vs_none', lambda x: x.mean()) + ]).round(2) + + # Calculate overhead percentage + baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0 + if baseline > 0: + dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1) + print(dnssec_stats) + + # Bandwidth analysis + print("\n--- Bandwidth Usage ---") + bandwidth_stats = self.df.groupby('protocol_category').agg({ + 'request_size_bytes': ['mean', 'median'], + 'response_size_bytes': ['mean', 'median'] + }).round(2) + print(bandwidth_stats) + + # Persistence impact (where applicable) + print("\n--- Connection Persistence Impact ---") + persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])] + if len(persist_protocols) > 0: + persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([ + ('mean', 'mean'), + ('median', 'median') + ]).round(2) + print(persist_stats) + + return { + 'protocol': protocol_stats, + 'provider': provider_stats, + 'dnssec': dnssec_stats, + 'bandwidth': bandwidth_stats + } + + def plot_latency_by_protocol(self, output_dir='plots'): + """Violin plot of latency distribution by protocol""" + Path(output_dir).mkdir(exist_ok=True) + + plt.figure(figsize=(14, 7)) + + # Order protocols logically + protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ'] + available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values] + + sns.violinplot(data=self.df, x='protocol_category', y='duration_ms', + order=available_protocols, inner='box', cut=0) + + plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold') + plt.xlabel('Protocol', fontsize=12) + plt.ylabel('Response Time (ms)', fontsize=12) + plt.xticks(rotation=0) + + # Add mean values as annotations + for i, protocol in enumerate(available_protocols): + mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean() + plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold') + + plt.tight_layout() + plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: latency_by_protocol.png") + + def plot_provider_comparison(self, output_dir='plots'): + """Box plot comparing providers across protocols""" + Path(output_dir).mkdir(exist_ok=True) + + fig, axes = plt.subplots(2, 2, figsize=(16, 12)) + fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold') + + protocols = self.df['protocol_category'].unique() + protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols] + + for idx, protocol in enumerate(protocols[:4]): + ax = axes[idx // 2, idx % 2] + data = self.df[self.df['protocol_category'] == protocol] + + if len(data) > 0: + sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax) + ax.set_title(f'{protocol}', fontsize=12, fontweight='bold') + ax.set_xlabel('Provider', fontsize=10) + ax.set_ylabel('Response Time (ms)', fontsize=10) + ax.tick_params(axis='x', rotation=45) + + plt.tight_layout() + plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: provider_comparison.png") + + def plot_dnssec_impact(self, output_dir='plots'): + """Compare DNSSEC validation methods (trust vs auth)""" + Path(output_dir).mkdir(exist_ok=True) + + # Filter for protocols that have DNSSEC variations + dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy() + + if len(dnssec_data) == 0: + print("⚠ No DNSSEC data available") + return + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + + # Plot 1: Overall DNSSEC impact + protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ'] + available = [p for p in protocol_order if p in self.df['protocol_category'].values] + + sns.barplot(data=self.df, x='protocol_category', y='duration_ms', + hue='dnssec_mode', order=available, ax=ax1, ci=95) + ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold') + ax1.set_xlabel('Protocol', fontsize=10) + ax1.set_ylabel('Mean Response Time (ms)', fontsize=10) + ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)']) + ax1.tick_params(axis='x', rotation=0) + + # Plot 2: Trust vs Auth comparison + comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index() + pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms') + + if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns: + pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100) + pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral') + ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold') + ax2.set_xlabel('Protocol', fontsize=10) + ax2.set_ylabel('Additional Overhead (%)', fontsize=10) + ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8) + ax2.tick_params(axis='x', rotation=45) + ax2.grid(axis='y', alpha=0.3) + + plt.tight_layout() + plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: dnssec_impact.png") + + def plot_persistence_impact(self, output_dir='plots'): + """Analyze impact of connection persistence""" + Path(output_dir).mkdir(exist_ok=True) + + persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy() + + if len(persist_data) == 0: + print("⚠ No persistence data available") + return + + plt.figure(figsize=(12, 6)) + + sns.barplot(data=persist_data, x='protocol_base', y='duration_ms', + hue='persistence', ci=95) + + plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold') + plt.xlabel('Protocol', fontsize=12) + plt.ylabel('Mean Response Time (ms)', fontsize=12) + plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled']) + + # Calculate and annotate overhead reduction + for protocol in persist_data['protocol_base'].unique(): + protocol_data = persist_data[persist_data['protocol_base'] == protocol] + + no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean() + with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean() + + if not np.isnan(no_persist) and not np.isnan(with_persist): + reduction = ((no_persist - with_persist) / no_persist * 100) + print(f"{protocol}: {reduction:.1f}% reduction with persistence") + + plt.tight_layout() + plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: persistence_impact.png") + + def plot_bandwidth_overhead(self, output_dir='plots'): + """Visualize bandwidth usage by protocol""" + Path(output_dir).mkdir(exist_ok=True) + + bandwidth_data = self.df.groupby('protocol_category').agg({ + 'request_size_bytes': 'mean', + 'response_size_bytes': 'mean' + }).reset_index() + + bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] + + bandwidth_data['response_size_bytes']) + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + + # Plot 1: Request vs Response sizes + x = np.arange(len(bandwidth_data)) + width = 0.35 + + ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width, + label='Request', alpha=0.8) + ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width, + label='Response', alpha=0.8) + + ax1.set_xlabel('Protocol', fontsize=12) + ax1.set_ylabel('Bytes', fontsize=12) + ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold') + ax1.set_xticks(x) + ax1.set_xticklabels(bandwidth_data['protocol_category']) + ax1.legend() + ax1.grid(axis='y', alpha=0.3) + + # Plot 2: Total bandwidth overhead vs UDP baseline + udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values + if len(udp_total) > 0: + bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100) + + colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']] + ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'], + color=colors, alpha=0.7) + ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8) + ax2.set_xlabel('Protocol', fontsize=12) + ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12) + ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold') + ax2.grid(axis='y', alpha=0.3) + + plt.tight_layout() + plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: bandwidth_overhead.png") + + def plot_heatmap(self, output_dir='plots'): + """Heatmap of provider-protocol performance""" + Path(output_dir).mkdir(exist_ok=True) + + # Create pivot table + heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack() + + plt.figure(figsize=(12, 8)) + sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r', + cbar_kws={'label': 'Median Latency (ms)'}) + + plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold') + plt.xlabel('Protocol', fontsize=12) + plt.ylabel('Provider', fontsize=12) + + plt.tight_layout() + plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: provider_protocol_heatmap.png") + + def plot_percentile_comparison(self, output_dir='plots'): + """Plot percentile comparison across protocols""" + Path(output_dir).mkdir(exist_ok=True) + + percentiles = [50, 75, 90, 95, 99] + protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ'] + available = [p for p in protocol_order if p in self.df['protocol_category'].values] + + percentile_data = [] + for protocol in available: + data = self.df[self.df['protocol_category'] == protocol]['duration_ms'] + for p in percentiles: + percentile_data.append({ + 'protocol': protocol, + 'percentile': f'P{p}', + 'latency': np.percentile(data, p) + }) + + percentile_df = pd.DataFrame(percentile_data) + + plt.figure(figsize=(14, 7)) + sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available) + + plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold') + plt.xlabel('Protocol', fontsize=12) + plt.ylabel('Response Time (ms)', fontsize=12) + plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left') + + plt.tight_layout() + plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight') + plt.close() + print(f"✓ Saved: percentile_comparison.png") + + def statistical_tests(self): + """Perform statistical significance tests""" + print("\n" + "="*80) + print("STATISTICAL TESTS") + print("="*80) + + # Test 1: Protocol differences (Kruskal-Wallis) + protocols = self.df['protocol_category'].unique() + if len(protocols) > 2: + groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values + for p in protocols] + h_stat, p_value = stats.kruskal(*groups) + print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---") + print(f"H-statistic: {h_stat:.4f}") + print(f"p-value: {p_value:.4e}") + print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols") + + # Test 2: DNSSEC impact (Mann-Whitney U) + if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values: + none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms'] + auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms'] + + u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided') + print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---") + print(f"U-statistic: {u_stat:.4f}") + print(f"p-value: {p_value:.4e}") + print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference") + + # Test 3: Trust vs Auth comparison + if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values: + trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms'] + auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms'] + + u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided') + print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---") + print(f"U-statistic: {u_stat:.4f}") + print(f"p-value: {p_value:.4e}") + print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust") + + def generate_latex_table(self, output_dir='plots'): + """Generate LaTeX table for thesis""" + Path(output_dir).mkdir(exist_ok=True) + + # Summary table by protocol + summary = self.df.groupby('protocol_category')['duration_ms'].agg([ + ('Mean', 'mean'), + ('Median', 'median'), + ('Std Dev', 'std'), + ('P95', lambda x: x.quantile(0.95)), + ('P99', lambda x: x.quantile(0.99)) + ]).round(2) + + latex_code = summary.to_latex(float_format="%.2f") + + with open(f'{output_dir}/summary_table.tex', 'w') as f: + f.write(latex_code) + + print(f"✓ Saved: summary_table.tex") + print("\nLaTeX Table Preview:") + print(latex_code) + + def run_full_analysis(self): + """Run complete analysis pipeline""" + print("="*80) + print("DNS QoS Analysis - Starting Full Analysis") + print("="*80) + + # Load data + print("\n[1/10] Loading data...") + self.load_all_data() + + # Generate statistics + print("\n[2/10] Generating summary statistics...") + self.generate_summary_statistics() + + # Statistical tests + print("\n[3/10] Running statistical tests...") + self.statistical_tests() + + # Generate plots + print("\n[4/10] Creating latency by protocol plot...") + self.plot_latency_by_protocol() + + print("\n[5/10] Creating provider comparison plot...") + self.plot_provider_comparison() + + print("\n[6/10] Creating DNSSEC impact plot...") + self.plot_dnssec_impact() + + print("\n[7/10] Creating persistence impact plot...") + self.plot_persistence_impact() + + print("\n[8/10] Creating bandwidth overhead plot...") + self.plot_bandwidth_overhead() + + print("\n[9/10] Creating heatmap...") + self.plot_heatmap() + + print("\n[10/10] Creating percentile comparison...") + self.plot_percentile_comparison() + + # Generate LaTeX table + print("\n[Bonus] Generating LaTeX table...") + self.generate_latex_table() + + print("\n" + "="*80) + print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.") + print("="*80) + if __name__ == "__main__": - root_dir = "." - output_file = "dns_metrics.csv" - - analyze_dns_data(root_dir, output_file) + analyzer = DNSAnalyzer(results_dir='results') + analyzer.run_full_analysis() diff --git a/scripts/analysis/analyze_simple.py b/scripts/analysis/analyze_simple.py new file mode 100644 index 0000000..a82e024 --- /dev/null +++ b/scripts/analysis/analyze_simple.py @@ -0,0 +1,536 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +from pathlib import Path +import datetime +from dateutil import parser as date_parser +import dpkt + +# Set style +sns.set_style("whitegrid") +plt.rcParams['figure.dpi'] = 300 +plt.rcParams['savefig.dpi'] = 300 +plt.rcParams['font.size'] = 10 + +class FastDNSAnalyzer: + def __init__(self, results_dir='results'): + self.results_dir = Path(results_dir) + self.all_data = [] + + def should_include_file(self, filename): + """Filter out DNSSEC and non-persist files""" + name = filename.stem + if 'auth' in name or 'trust' in name: + return False + if name in ['tls', 'https']: + return False + return True + + def parse_rfc3339_nano(self, timestamp_str): + """Parse RFC3339Nano timestamp with timezone""" + try: + dt = date_parser.parse(timestamp_str) + return dt.astimezone(datetime.timezone.utc).timestamp() + except Exception as e: + print(f" Error parsing timestamp {timestamp_str}: {e}") + return None + + def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data): + """Fast bandwidth extraction using dpkt""" + print(f" Analyzing pcap: {pcap_file.name}") + + try: + with open(pcap_file, 'rb') as f: + pcap = dpkt.pcap.Reader(f) + + # Build query time windows + query_windows = [] + for idx, row in csv_data.iterrows(): + start_time = self.parse_rfc3339_nano(row['timestamp']) + if start_time is None: + continue + + duration_seconds = row['duration_ns'] / 1_000_000_000 + end_time = start_time + duration_seconds + + query_windows.append({ + 'index': idx, + 'start': start_time, + 'end': end_time, + 'bytes_sent': 0, + 'bytes_received': 0, + 'packets_sent': 0, + 'packets_received': 0 + }) + + if not query_windows: + print(" ✗ No valid query windows") + return None + + # Sort windows for faster matching + query_windows.sort(key=lambda x: x['start']) + + # Process packets + packet_count = 0 + matched_count = 0 + + for timestamp, buf in pcap: + packet_count += 1 + packet_size = len(buf) + + # Quick parse to determine direction + try: + eth = dpkt.ethernet.Ethernet(buf) + + # Get IP layer + if isinstance(eth.data, dpkt.ip.IP): + ip = eth.data + elif isinstance(eth.data, dpkt.ip6.IP6): + ip = eth.data + else: + continue + + # Get transport layer + if isinstance(ip.data, dpkt.udp.UDP): + transport = ip.data + src_port = transport.sport + dst_port = transport.dport + elif isinstance(ip.data, dpkt.tcp.TCP): + transport = ip.data + src_port = transport.sport + dst_port = transport.dport + else: + continue + + # Determine direction (client port usually higher) + is_outbound = src_port > dst_port + + # Binary search for matching window + for window in query_windows: + if window['start'] <= timestamp <= window['end']: + if is_outbound: + window['bytes_sent'] += packet_size + window['packets_sent'] += 1 + else: + window['bytes_received'] += packet_size + window['packets_received'] += 1 + matched_count += 1 + break + elif timestamp < window['start']: + break # No more windows to check + + except Exception: + continue + + print(f" ✓ Processed {packet_count} packets, matched {matched_count}") + + # Convert to DataFrame + bandwidth_df = pd.DataFrame(query_windows) + return bandwidth_df[['index', 'bytes_sent', 'bytes_received', + 'packets_sent', 'packets_received']] + + except Exception as e: + print(f" ✗ Error reading pcap: {e}") + return None + + def load_data(self): + """Load all relevant CSV files and extract bandwidth from pcaps""" + print("Loading data and analyzing bandwidth...") + + for provider_dir in self.results_dir.iterdir(): + if not provider_dir.is_dir(): + continue + + provider = provider_dir.name + + for csv_file in provider_dir.glob('*.csv'): + if not self.should_include_file(csv_file): + continue + + try: + df = pd.read_csv(csv_file) + df['provider'] = provider + df['test_file'] = csv_file.stem + df['csv_path'] = str(csv_file) + + # Find corresponding pcap file + pcap_file = csv_file.with_suffix('.pcap') + if pcap_file.exists(): + print(f" Processing: {provider}/{csv_file.name}") + bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df) + + if bandwidth_data is not None and len(bandwidth_data) > 0: + # Merge bandwidth data + df = df.reset_index(drop=True) + for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']: + df[col] = 0 + + for _, row in bandwidth_data.iterrows(): + idx = int(row['index']) + if idx < len(df): + df.at[idx, 'bytes_sent'] = row['bytes_sent'] + df.at[idx, 'bytes_received'] = row['bytes_received'] + df.at[idx, 'packets_sent'] = row['packets_sent'] + df.at[idx, 'packets_received'] = row['packets_received'] + + df['total_bytes'] = df['bytes_sent'] + df['bytes_received'] + + print(f" ✓ Extracted bandwidth for {len(df)} queries") + else: + print(f" ⚠ Could not extract bandwidth data") + else: + print(f" ⚠ No pcap found for {csv_file.name}") + + self.all_data.append(df) + + except Exception as e: + print(f" ✗ Error loading {csv_file}: {e}") + import traceback + traceback.print_exc() + + print(f"\nTotal files loaded: {len(self.all_data)}") + + def create_line_graphs(self, output_dir='output/line_graphs'): + """Create line graphs for latency and bandwidth""" + Path(output_dir).mkdir(parents=True, exist_ok=True) + + print("\nGenerating line graphs...") + + for df in self.all_data: + provider = df['provider'].iloc[0] + test_name = df['test_file'].iloc[0] + + df['query_index'] = range(1, len(df) + 1) + + # Create figure with 2 subplots + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10)) + + # Plot 1: Latency + ax1.plot(df['query_index'], df['duration_ms'], marker='o', + markersize=4, linewidth=1, alpha=0.7, color='steelblue') + mean_latency = df['duration_ms'].mean() + ax1.axhline(y=mean_latency, color='r', linestyle='--', + label=f'Mean: {mean_latency:.2f} ms', linewidth=2) + ax1.set_xlabel('Query Number', fontsize=12) + ax1.set_ylabel('Latency (ms)', fontsize=12) + ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold') + ax1.legend() + ax1.grid(True, alpha=0.3) + + # Plot 2: Bandwidth + if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0: + ax2.plot(df['query_index'], df['bytes_sent'], marker='s', + markersize=4, linewidth=1, alpha=0.7, + color='orange', label='Sent') + ax2.plot(df['query_index'], df['bytes_received'], marker='^', + markersize=4, linewidth=1, alpha=0.7, + color='green', label='Received') + + mean_sent = df['bytes_sent'].mean() + mean_received = df['bytes_received'].mean() + ax2.axhline(y=mean_sent, color='orange', linestyle='--', + linewidth=1.5, alpha=0.5) + ax2.axhline(y=mean_received, color='green', linestyle='--', + linewidth=1.5, alpha=0.5) + + ax2.set_xlabel('Query Number', fontsize=12) + ax2.set_ylabel('Bytes', fontsize=12) + ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)', + fontsize=12, fontweight='bold') + ax2.legend() + ax2.grid(True, alpha=0.3) + + fig.suptitle(f'{provider.upper()} - {test_name}', + fontsize=14, fontweight='bold') + plt.tight_layout() + + filename = f"{provider}_{test_name}.png" + plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight') + plt.close() + + print(f" ✓ Created: {filename}") + + def get_protocol_name(self, test_file): + """Extract clean protocol name""" + name = test_file.replace('-persist', '') + + protocol_map = { + 'udp': 'Plain DNS (UDP)', + 'tls': 'DoT (DNS over TLS)', + 'https': 'DoH (DNS over HTTPS)', + 'doh3': 'DoH/3 (DNS over HTTP/3)', + 'doq': 'DoQ (DNS over QUIC)' + } + + return protocol_map.get(name, name.upper()) + + def create_resolver_comparison_bars(self, output_dir='output/comparisons'): + """Create bar graphs comparing resolvers for latency and bandwidth""" + Path(output_dir).mkdir(parents=True, exist_ok=True) + + print("\nGenerating resolver comparison graphs...") + + combined_df = pd.concat(self.all_data, ignore_index=True) + protocols = combined_df['test_file'].unique() + + for protocol in protocols: + protocol_data = combined_df[combined_df['test_file'] == protocol] + protocol_name = self.get_protocol_name(protocol) + + # Latency stats + latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([ + ('mean', 'mean'), + ('median', 'median'), + ('std', 'std') + ]).reset_index() + + # Create latency comparison + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + fig.suptitle(f'{protocol_name} - Latency Comparison', + fontsize=16, fontweight='bold') + + # Mean latency + bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'], + color='steelblue', alpha=0.8, edgecolor='black') + ax1.errorbar(latency_stats['provider'], latency_stats['mean'], + yerr=latency_stats['std'], fmt='none', color='black', + capsize=5, alpha=0.6) + + for bar in bars1: + height = bar.get_height() + ax1.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.2f}', + ha='center', va='bottom', fontweight='bold') + + ax1.set_xlabel('Resolver', fontsize=12) + ax1.set_ylabel('Mean Latency (ms)', fontsize=12) + ax1.set_title('Mean Latency', fontsize=12) + ax1.grid(axis='y', alpha=0.3) + + # Median latency + bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'], + color='coral', alpha=0.8, edgecolor='black') + + for bar in bars2: + height = bar.get_height() + ax2.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.2f}', + ha='center', va='bottom', fontweight='bold') + + ax2.set_xlabel('Resolver', fontsize=12) + ax2.set_ylabel('Median Latency (ms)', fontsize=12) + ax2.set_title('Median Latency', fontsize=12) + ax2.grid(axis='y', alpha=0.3) + + plt.tight_layout() + plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight') + plt.close() + print(f" ✓ Created: latency_{protocol}.png") + + # Bandwidth comparison + if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0: + bandwidth_stats = protocol_data.groupby('provider').agg({ + 'bytes_sent': 'mean', + 'bytes_received': 'mean', + 'total_bytes': 'mean' + }).reset_index() + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + fig.suptitle(f'{protocol_name} - Bandwidth Comparison', + fontsize=16, fontweight='bold') + + # Sent vs Received + x = np.arange(len(bandwidth_stats)) + width = 0.35 + + bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width, + label='Sent', color='orange', alpha=0.8, edgecolor='black') + bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width, + label='Received', color='green', alpha=0.8, edgecolor='black') + + ax1.set_xlabel('Resolver', fontsize=12) + ax1.set_ylabel('Bytes per Query', fontsize=12) + ax1.set_title('Average Bandwidth per Query', fontsize=12) + ax1.set_xticks(x) + ax1.set_xticklabels(bandwidth_stats['provider']) + ax1.legend() + ax1.grid(axis='y', alpha=0.3) + + # Total bandwidth + bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'], + color='purple', alpha=0.8, edgecolor='black') + + for bar in bars3: + height = bar.get_height() + ax2.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.0f}', + ha='center', va='bottom', fontweight='bold') + + ax2.set_xlabel('Resolver', fontsize=12) + ax2.set_ylabel('Total Bytes per Query', fontsize=12) + ax2.set_title('Total Bandwidth per Query', fontsize=12) + ax2.grid(axis='y', alpha=0.3) + + plt.tight_layout() + plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight') + plt.close() + print(f" ✓ Created: bandwidth_{protocol}.png") + + def generate_latex_tables(self, output_dir='output/tables'): + """Generate LaTeX tables with latency and bandwidth statistics""" + Path(output_dir).mkdir(parents=True, exist_ok=True) + + print("\nGenerating LaTeX tables...") + + combined_df = pd.concat(self.all_data, ignore_index=True) + + # Generate latency table for each resolver + for provider in combined_df['provider'].unique(): + provider_data = combined_df[combined_df['provider'] == provider] + + stats = provider_data.groupby('test_file')['duration_ms'].agg([ + ('Mean', 'mean'), + ('Median', 'median'), + ('Std Dev', 'std'), + ('P95', lambda x: x.quantile(0.95)), + ('P99', lambda x: x.quantile(0.99)) + ]).round(2) + + stats.index = stats.index.map(self.get_protocol_name) + stats.index.name = 'Protocol' + + latex_code = stats.to_latex( + caption=f'{provider.upper()} - Latency Statistics (ms)', + label=f'tab:{provider}_latency', + float_format="%.2f" + ) + + with open(f'{output_dir}/{provider}_latency.tex', 'w') as f: + f.write(latex_code) + + print(f" ✓ Created: {provider}_latency.tex") + + # Generate bandwidth table for each resolver + for provider in combined_df['provider'].unique(): + provider_data = combined_df[combined_df['provider'] == provider] + + if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0: + continue + + bandwidth_stats = provider_data.groupby('test_file').agg({ + 'bytes_sent': 'mean', + 'bytes_received': 'mean', + 'total_bytes': 'mean' + }).round(2) + + bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)'] + bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name) + bandwidth_stats.index.name = 'Protocol' + + latex_code = bandwidth_stats.to_latex( + caption=f'{provider.upper()} - Bandwidth Statistics', + label=f'tab:{provider}_bandwidth', + float_format="%.2f" + ) + + with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f: + f.write(latex_code) + + print(f" ✓ Created: {provider}_bandwidth.tex") + + # Generate protocol efficiency table + print("\nGenerating protocol efficiency table...") + + if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0: + protocol_bandwidth = combined_df.groupby('test_file').agg({ + 'bytes_sent': 'mean', + 'bytes_received': 'mean', + 'total_bytes': 'mean' + }).round(2) + + # Find UDP baseline + udp_baseline = None + for protocol in protocol_bandwidth.index: + if 'udp' in protocol: + udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes'] + break + + if udp_baseline and udp_baseline > 0: + protocol_bandwidth['Overhead vs UDP (%)'] = ( + (protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100 + ).round(1) + protocol_bandwidth['Efficiency (%)'] = ( + 100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100) + ).round(1) + + protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)', + 'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)'] + protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name) + protocol_bandwidth.index.name = 'Protocol' + + latex_code = protocol_bandwidth.to_latex( + caption='Protocol Bandwidth Efficiency Comparison', + label='tab:protocol_efficiency', + float_format="%.2f" + ) + + with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f: + f.write(latex_code) + + print(f" ✓ Created: protocol_efficiency.tex") + print("\n--- Protocol Efficiency ---") + print(protocol_bandwidth.to_string()) + + # Generate combined comparison tables + for metric in ['Mean', 'Median', 'P95']: + comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([ + ('Mean', 'mean'), + ('Median', 'median'), + ('P95', lambda x: x.quantile(0.95)) + ]).round(2) + + pivot_table = comparison_stats[metric].unstack(level=0) + pivot_table.index = pivot_table.index.map(self.get_protocol_name) + pivot_table.index.name = 'Protocol' + + latex_code = pivot_table.to_latex( + caption=f'Resolver Latency Comparison - {metric} (ms)', + label=f'tab:comparison_{metric.lower()}', + float_format="%.2f" + ) + + with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f: + f.write(latex_code) + + print(f" ✓ Created: comparison_{metric.lower()}.tex") + + def run_analysis(self): + """Run the complete analysis""" + print("="*80) + print("Fast DNS QoS Analysis with Bandwidth") + print("="*80) + + self.load_data() + + if not self.all_data: + print("\n⚠ No data loaded.") + return + + print("\n" + "="*80) + self.create_line_graphs() + + print("\n" + "="*80) + self.create_resolver_comparison_bars() + + print("\n" + "="*80) + self.generate_latex_tables() + + print("\n" + "="*80) + print("✓ Analysis Complete!") + print("="*80) + + +if __name__ == "__main__": + analyzer = FastDNSAnalyzer(results_dir='results') + analyzer.run_analysis() diff --git a/scripts/tools/add_extra_metrics_to_csv.go b/scripts/tools/add_extra_metrics_to_csv.go new file mode 100644 index 0000000..1b79038 --- /dev/null +++ b/scripts/tools/add_extra_metrics_to_csv.go @@ -0,0 +1,369 @@ +package main + +import ( + "encoding/csv" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/google/gopacket" + "github.com/google/gopacket/layers" + "github.com/google/gopacket/pcapgo" +) + +type QueryRecord struct { + Domain string + QueryType string + Protocol string + DNSSec string + AuthDNSSec string + KeepAlive string + DNSServer string + Timestamp string + DurationNs int64 + DurationMs float64 + RequestSizeBytes int + ResponseSizeBytes int + ResponseCode string + Error string + BytesSent int64 + BytesReceived int64 + PacketsSent int64 + PacketsReceived int64 + TotalBytes int64 +} + +func parseRFC3339Nano(ts string) (time.Time, error) { + return time.Parse(time.RFC3339Nano, ts) +} + +func processProviderFolder(providerPath string) error { + providerName := filepath.Base(providerPath) + fmt.Printf("\n=== Processing provider: %s ===\n", providerName) + + files, err := os.ReadDir(providerPath) + if err != nil { + return err + } + + processed := 0 + skipped := 0 + errors := 0 + + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".csv") { + continue + } + + csvPath := filepath.Join(providerPath, file.Name()) + pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1) + + // Check if PCAP exists + if _, err := os.Stat(pcapPath); os.IsNotExist(err) { + fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name()) + skipped++ + continue + } + + // Check if already processed (has backup) + backupPath := csvPath + ".bak" + if _, err := os.Stat(backupPath); err == nil { + fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name()) + skipped++ + continue + } + + fmt.Printf(" ↻ Processing: %s ... ", file.Name()) + if err := processPair(csvPath, pcapPath); err != nil { + fmt.Printf("ERROR\n") + log.Printf(" Error: %v\n", err) + errors++ + } else { + fmt.Printf("✓\n") + processed++ + } + } + + fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors) + return nil +} + +func processPair(csvPath, pcapPath string) error { + // Create backup + backupPath := csvPath + ".bak" + input, err := os.ReadFile(csvPath) + if err != nil { + return fmt.Errorf("backup read failed: %w", err) + } + if err := os.WriteFile(backupPath, input, 0644); err != nil { + return fmt.Errorf("backup write failed: %w", err) + } + + // Read CSV records + records, err := readCSV(csvPath) + if err != nil { + return fmt.Errorf("CSV read failed: %w", err) + } + + if len(records) == 0 { + return fmt.Errorf("no records in CSV") + } + + // Read and parse PCAP + packets, err := readPCAPGo(pcapPath) + if err != nil { + return fmt.Errorf("PCAP read failed: %w", err) + } + + // Enrich records with bandwidth data + enrichRecords(records, packets) + + // Write enriched CSV + if err := writeCSV(csvPath, records); err != nil { + return fmt.Errorf("CSV write failed: %w", err) + } + + return nil +} + +func readCSV(path string) ([]*QueryRecord, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + r := csv.NewReader(f) + rows, err := r.ReadAll() + if err != nil { + return nil, err + } + + if len(rows) < 2 { + return nil, fmt.Errorf("CSV has no data rows") + } + + records := make([]*QueryRecord, 0, len(rows)-1) + for i := 1; i < len(rows); i++ { + row := rows[i] + if len(row) < 14 { + log.Printf(" Warning: Skipping malformed row %d", i+1) + continue + } + + durationNs, _ := strconv.ParseInt(row[8], 10, 64) + durationMs, _ := strconv.ParseFloat(row[9], 64) + reqSize, _ := strconv.Atoi(row[10]) + respSize, _ := strconv.Atoi(row[11]) + + records = append(records, &QueryRecord{ + Domain: row[0], + QueryType: row[1], + Protocol: row[2], + DNSSec: row[3], + AuthDNSSec: row[4], + KeepAlive: row[5], + DNSServer: row[6], + Timestamp: row[7], + DurationNs: durationNs, + DurationMs: durationMs, + RequestSizeBytes: reqSize, + ResponseSizeBytes: respSize, + ResponseCode: row[12], + Error: row[13], + }) + } + + return records, nil +} + +type PacketInfo struct { + Timestamp time.Time + Size int + IsSent bool +} + +func readPCAPGo(path string) ([]PacketInfo, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + reader, err := pcapgo.NewReader(f) + if err != nil { + return nil, err + } + + var packets []PacketInfo + packetSource := gopacket.NewPacketSource(reader, reader.LinkType()) + + for packet := range packetSource.Packets() { + if packet.NetworkLayer() == nil { + continue + } + + isDNS := false + isSent := false + + // Check UDP layer (DNS, DoQ, DoH3) + if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil { + udp := udpLayer.(*layers.UDP) + isDNS = udp.SrcPort == 53 || udp.DstPort == 53 || + udp.SrcPort == 853 || udp.DstPort == 853 || + udp.SrcPort == 443 || udp.DstPort == 443 + isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443 + } + + // Check TCP layer (DoT, DoH) + if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil { + tcp := tcpLayer.(*layers.TCP) + isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 || + tcp.SrcPort == 853 || tcp.DstPort == 853 || + tcp.SrcPort == 443 || tcp.DstPort == 443 + isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443 + } + + if isDNS { + packets = append(packets, PacketInfo{ + Timestamp: packet.Metadata().Timestamp, + Size: len(packet.Data()), + IsSent: isSent, + }) + } + } + + return packets, nil +} + +func enrichRecords(records []*QueryRecord, packets []PacketInfo) { + for _, rec := range records { + ts, err := parseRFC3339Nano(rec.Timestamp) + if err != nil { + log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp) + continue + } + + // Define time window for this query + windowStart := ts + windowEnd := ts.Add(time.Duration(rec.DurationNs)) + + var sent, recv, pktSent, pktRecv int64 + + // Match packets within the time window + for _, pkt := range packets { + if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) && + pkt.Timestamp.Before(windowEnd) { + if pkt.IsSent { + sent += int64(pkt.Size) + pktSent++ + } else { + recv += int64(pkt.Size) + pktRecv++ + } + } + } + + rec.BytesSent = sent + rec.BytesReceived = recv + rec.PacketsSent = pktSent + rec.PacketsReceived = pktRecv + rec.TotalBytes = sent + recv + } +} + +func writeCSV(path string, records []*QueryRecord) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + w := csv.NewWriter(f) + defer w.Flush() + + // Write header + header := []string{ + "domain", "query_type", "protocol", "dnssec", "auth_dnssec", + "keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms", + "request_size_bytes", "response_size_bytes", "response_code", "error", + "bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes", + } + if err := w.Write(header); err != nil { + return err + } + + // Write data rows + for _, rec := range records { + row := []string{ + rec.Domain, + rec.QueryType, + rec.Protocol, + rec.DNSSec, + rec.AuthDNSSec, + rec.KeepAlive, + rec.DNSServer, + rec.Timestamp, + strconv.FormatInt(rec.DurationNs, 10), + strconv.FormatFloat(rec.DurationMs, 'f', -1, 64), + strconv.Itoa(rec.RequestSizeBytes), + strconv.Itoa(rec.ResponseSizeBytes), + rec.ResponseCode, + rec.Error, + strconv.FormatInt(rec.BytesSent, 10), + strconv.FormatInt(rec.BytesReceived, 10), + strconv.FormatInt(rec.PacketsSent, 10), + strconv.FormatInt(rec.PacketsReceived, 10), + strconv.FormatInt(rec.TotalBytes, 10), + } + if err := w.Write(row); err != nil { + return err + } + } + + return nil +} + +func main() { + resultsDir := "results" + providers := []string{"adguard", "cloudflare", "google", "quad9"} + + fmt.Println("╔═══════════════════════════════════════════════╗") + fmt.Println("║ DNS PCAP Preprocessor v1.0 ║") + fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║") + fmt.Println("╚═══════════════════════════════════════════════╝") + + totalProcessed := 0 + totalSkipped := 0 + totalErrors := 0 + + for _, provider := range providers { + providerPath := filepath.Join(resultsDir, provider) + if _, err := os.Stat(providerPath); os.IsNotExist(err) { + fmt.Printf("\n⚠ Provider folder not found: %s\n", provider) + continue + } + + if err := processProviderFolder(providerPath); err != nil { + log.Printf("Error processing %s: %v\n", provider, err) + totalErrors++ + } + } + + fmt.Println("\n╔═══════════════════════════════════════════════╗") + fmt.Println("║ Preprocessing Complete! ║") + fmt.Println("╚═══════════════════════════════════════════════╝") + fmt.Printf("\nAll CSV files now have 5 additional columns:\n") + fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n") + fmt.Printf(" • bytes_received - Total bytes received from DNS server\n") + fmt.Printf(" • packets_sent - Number of packets sent\n") + fmt.Printf(" • packets_received - Number of packets received\n") + fmt.Printf(" • total_bytes - Sum of sent + received bytes\n") + fmt.Printf("\n📁 Backups saved as: *.csv.bak\n") + fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n") + fmt.Printf(" but all files now have complete bandwidth metrics!\n") +} diff --git a/scripts/tools/add_extra_metrics_to_csv.py b/scripts/tools/add_extra_metrics_to_csv.py index f537e66..ed09cba 100644 --- a/scripts/tools/add_extra_metrics_to_csv.py +++ b/scripts/tools/add_extra_metrics_to_csv.py @@ -1,250 +1,362 @@ #!/usr/bin/env python3 """ -Add network metrics from PCAP files to DNS CSV files. -Adds: raw_bytes_total, raw_packet_count, overhead_bytes, efficiency_percent +Fast PCAP Preprocessor for DNS QoS Analysis +Loads PCAP into memory first, then uses binary search for matching. +Uses LAN IP to determine direction (LAN = sent, non-LAN = received). """ import csv -import os -import argparse -import re +import shutil from pathlib import Path -from datetime import datetime, timezone -from scapy.all import rdpcap +from typing import Dict, List, NamedTuple +import time -def parse_timestamp(ts_str): - """Parse timestamp with timezone and nanoseconds (RFC3339Nano).""" - match = re.match( - r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)([\+\-]\d{2}:\d{2})', - ts_str - ) - - if not match: - raise ValueError(f"Invalid timestamp format: {ts_str}") - - base, nanos, tz = match.groups() - micros = nanos[:6].ljust(6, '0') - iso_str = f"{base}.{micros}{tz}" - dt = datetime.fromisoformat(iso_str) - full_nanos = int(nanos.ljust(9, '0')) - - return dt, full_nanos +import dpkt +from dateutil import parser as date_parser -def read_pcap(pcap_path): - """Read PCAP and return list of (timestamp_epoch, size).""" + +class Packet(NamedTuple): + """Lightweight packet representation.""" + timestamp: float + size: int + is_outbound: bool # True if from LAN, False if from internet + + +class QueryWindow: + """Efficient query window representation.""" + __slots__ = ['index', 'start', 'end', 'sent', 'received', 'pkts_sent', 'pkts_received'] + + def __init__(self, index: int, start: float, end: float): + self.index = index + self.start = start + self.end = end + self.sent = 0 + self.received = 0 + self.pkts_sent = 0 + self.pkts_received = 0 + + +def parse_csv_timestamp(ts_str: str) -> float: + """Convert RFC3339Nano timestamp to Unix epoch (seconds).""" + dt = date_parser.isoparse(ts_str) + return dt.timestamp() + + +def is_lan_ip(ip_bytes: bytes) -> bool: + """Check if IP is a private/LAN address.""" + if len(ip_bytes) != 4: + return False + + first = ip_bytes[0] + second = ip_bytes[1] + + # 10.0.0.0/8 + if first == 10: + return True + + # 172.16.0.0/12 + if first == 172 and 16 <= second <= 31: + return True + + # 192.168.0.0/16 + if first == 192 and second == 168: + return True + + # 127.0.0.0/8 (localhost) + if first == 127: + return True + + return False + + +def load_pcap_into_memory(pcap_path: Path) -> List[Packet]: + """Load all packets from PCAP into memory with minimal data.""" packets = [] + + print(f" Loading PCAP into memory...") + start_time = time.time() + try: - pkts = rdpcap(str(pcap_path)) - for pkt in pkts: - timestamp = float(pkt.time) - length = len(pkt) - packets.append((timestamp, length)) + with open(pcap_path, 'rb') as f: + try: + pcap = dpkt.pcap.Reader(f) + except: + # Try pcapng format + f.seek(0) + pcap = dpkt.pcapng.Reader(f) + + for ts, buf in pcap: + try: + packet_time = float(ts) + packet_size = len(buf) + + # Parse to get source IP + eth = dpkt.ethernet.Ethernet(buf) + + # Default to outbound if we can't determine + is_outbound = True + + if isinstance(eth.data, dpkt.ip.IP): + ip = eth.data + src_ip = ip.src + is_outbound = is_lan_ip(src_ip) + + packets.append(Packet( + timestamp=packet_time, + size=packet_size, + is_outbound=is_outbound + )) + + except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, AttributeError): + continue + except Exception as e: - print(f" ❌ Error reading PCAP: {e}") + print(f" Error reading PCAP: {e}") return [] + elapsed = time.time() - start_time + print(f" Loaded {len(packets):,} packets in {elapsed:.2f}s") + + # Sort by timestamp for binary search + packets.sort(key=lambda p: p.timestamp) + return packets -def find_packets_in_window(packets, start_ts, start_nanos, duration_ns): - """Find packets within exact time window.""" - start_epoch = start_ts.timestamp() - start_epoch += (start_nanos % 1_000_000) / 1_000_000_000 - end_epoch = start_epoch + (duration_ns / 1_000_000_000) - - total_bytes = 0 - packet_count = 0 - - for pkt_ts, pkt_len in packets: - if start_epoch <= pkt_ts <= end_epoch: - total_bytes += pkt_len - packet_count += 1 - - return total_bytes, packet_count -def enhance_csv(csv_path, pcap_path, output_path, debug=False): - """Add PCAP metrics to CSV.""" - if not os.path.exists(pcap_path): - print(f"⚠️ PCAP not found: {pcap_path}") - return False - - print(f"Processing: {os.path.basename(csv_path)}") - - # Read PCAP - packets = read_pcap(pcap_path) - print(f" Loaded {len(packets)} packets") - +def find_packets_in_window( + packets: List[Packet], + start_time: float, + end_time: float, + left_hint: int = 0 +) -> tuple[List[Packet], int]: + """ + Binary search to find all packets within time window. + Returns (matching_packets, left_index_hint_for_next_search). + """ if not packets: - print(" ❌ No packets found") - return False + return [], 0 - if packets and debug: - first_pcap = packets[0][0] - last_pcap = packets[-1][0] - print(f" First PCAP packet: {first_pcap:.6f}") - print(f" Last PCAP packet: {last_pcap:.6f}") - print(f" PCAP duration: {(last_pcap - first_pcap):.3f}s") + # Binary search for first packet >= start_time + left, right = left_hint, len(packets) - 1 + first_idx = len(packets) - # Read CSV - with open(csv_path, 'r', newline='') as f: - reader = csv.DictReader(f) - fieldnames = list(reader.fieldnames) + [ - 'raw_bytes_total', - 'raw_packet_count', - 'overhead_bytes', - 'efficiency_percent' - ] - rows = list(reader) + while left <= right: + mid = (left + right) // 2 + if packets[mid].timestamp >= start_time: + first_idx = mid + right = mid - 1 + else: + left = mid + 1 - if rows and debug: - try: - first_ts, _ = parse_timestamp(rows[0]['timestamp']) - last_ts, _ = parse_timestamp(rows[-1]['timestamp']) - print(f" First CSV query: {first_ts.timestamp():.6f}") - print(f" Last CSV query: {last_ts.timestamp():.6f}") - offset = packets[0][0] - first_ts.timestamp() - print(f" Time offset (PCAP - CSV): {offset:.3f}s") - except: - pass + # No packets in range + if first_idx >= len(packets) or packets[first_idx].timestamp > end_time: + return [], first_idx - # Enhance rows - enhanced = [] - matched = 0 + # Collect all packets in window + matching = [] + idx = first_idx + while idx < len(packets) and packets[idx].timestamp <= end_time: + matching.append(packets[idx]) + idx += 1 - for i, row in enumerate(rows): - try: - timestamp, nanos = parse_timestamp(row['timestamp']) - duration_ns = int(row['duration_ns']) - - raw_bytes, packet_count = find_packets_in_window( - packets, timestamp, nanos, duration_ns - ) - - useful_bytes = ( - int(row['request_size_bytes']) + - int(row['response_size_bytes']) - ) - overhead = raw_bytes - useful_bytes - efficiency = ( - (useful_bytes / raw_bytes * 100) - if raw_bytes > 0 else 0 - ) - - row['raw_bytes_total'] = raw_bytes - row['raw_packet_count'] = packet_count - row['overhead_bytes'] = overhead - row['efficiency_percent'] = f"{efficiency:.2f}" - - if raw_bytes > 0: - matched += 1 - - # Debug first few queries - if debug and i < 3: - print(f" Query {i}: {row['domain']}") - print(f" Duration: {duration_ns / 1e6:.3f}ms") - print(f" Matched packets: {packet_count}") - print(f" Raw bytes: {raw_bytes}") - print(f" Useful bytes: {useful_bytes}") - print(f" Efficiency: {efficiency:.2f}%") - - except (ValueError, KeyError) as e: - if debug: - print(f" Error processing row {i}: {e}") - row['raw_bytes_total'] = 0 - row['raw_packet_count'] = 0 - row['overhead_bytes'] = 0 - row['efficiency_percent'] = "0.00" - - enhanced.append(row) - - print(f" Matched: {matched}/{len(rows)} queries") - - if matched == 0: - print(" ⚠️ WARNING: No queries matched any packets!") - print(" This might indicate timestamp misalignment.") - - # Write output - os.makedirs(os.path.dirname(output_path), exist_ok=True) - with open(output_path, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore') - writer.writeheader() - writer.writerows(enhanced) - - print(f" ✓ Saved: {output_path}") - return True + return matching, first_idx -def main(): - parser = argparse.ArgumentParser( - description='Add PCAP network metrics to DNS CSV files' - ) - parser.add_argument('input_dir', help='Input directory (e.g., results)') - parser.add_argument( - '--output', - default='./results_enriched', - help='Output directory (default: ./results_enriched)' - ) - parser.add_argument( - '--dry-run', - action='store_true', - help='Preview files without processing' - ) - parser.add_argument( - '--debug', - action='store_true', - help='Show detailed timing information' - ) + +def load_csv_queries(csv_path: Path) -> List[Dict]: + """Load CSV and create query data structures.""" + queries = [] + with open(csv_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + try: + ts_epoch = parse_csv_timestamp(row['timestamp']) + duration_s = float(row['duration_ns']) / 1e9 + queries.append({ + 'data': row, + 'start_time': ts_epoch, + 'end_time': ts_epoch + duration_s, + }) + except Exception as e: + print(f" Warning: Skipping row - {e}") + continue + return queries + + +def match_packets_to_queries( + packets: List[Packet], + queries: List[Dict] +) -> List[Dict]: + """Match packets to query windows using binary search.""" + if not queries or not packets: + return queries - args = parser.parse_args() + print(f" Matching packets to queries...") + start_time = time.time() - print("=" * 60) - print("ENHANCE DNS CSVs WITH PCAP METRICS") - print("=" * 60) - print(f"Input: {args.input_dir}") - print(f"Output: {args.output}") - if args.debug: - print("Debug: ENABLED") - print() + # Initialize metrics + for q in queries: + q['bytes_sent'] = 0 + q['bytes_received'] = 0 + q['packets_sent'] = 0 + q['packets_received'] = 0 + q['total_bytes'] = 0 - # Find CSV files - csv_files = list(Path(args.input_dir).rglob('*.csv')) + # Sort queries by start time for sequential processing + queries_sorted = sorted(enumerate(queries), key=lambda x: x[1]['start_time']) - if not csv_files: - print("❌ No CSV files found") - return 1 + matched_packets = 0 + left_hint = 0 # Optimization: start next search from here - print(f"Found {len(csv_files)} CSV files\n") + for original_idx, q in queries_sorted: + matching, left_hint = find_packets_in_window( + packets, + q['start_time'], + q['end_time'], + left_hint + ) + + for pkt in matching: + matched_packets += 1 + if pkt.is_outbound: + q['bytes_sent'] += pkt.size + q['packets_sent'] += 1 + else: + q['bytes_received'] += pkt.size + q['packets_received'] += 1 + + q['total_bytes'] = q['bytes_sent'] + q['bytes_received'] - if args.dry_run: - print("DRY RUN - would process:") - for csv_path in csv_files: - pcap_path = csv_path.with_suffix('.pcap') - print(f" {csv_path.relative_to(args.input_dir)}") - print(f" PCAP: {'✓' if pcap_path.exists() else '✗'}") - return 0 + elapsed = time.time() - start_time + print(f" Matched {matched_packets:,} packets in {elapsed:.2f}s") - # Process files - success = 0 - failed = 0 + # Statistics + total_sent = sum(q['bytes_sent'] for q in queries) + total_recv = sum(q['bytes_received'] for q in queries) + queries_with_data = sum(1 for q in queries if q['total_bytes'] > 0) + print(f" Total: {total_sent:,} bytes sent, {total_recv:,} bytes received") + print(f" Queries with data: {queries_with_data}/{len(queries)}") + + return queries + + +def write_enriched_csv( + csv_path: Path, queries: List[Dict], backup: bool = True +): + """Write enriched CSV with bandwidth columns.""" + if backup and csv_path.exists(): + backup_path = csv_path.with_suffix('.csv.bak') + if not backup_path.exists(): # Don't overwrite existing backup + shutil.copy2(csv_path, backup_path) + print(f" Backup: {backup_path.name}") + + # Get fieldnames + original_fields = list(queries[0]['data'].keys()) + new_fields = [ + 'bytes_sent', + 'bytes_received', + 'packets_sent', + 'packets_received', + 'total_bytes', + ] + fieldnames = original_fields + new_fields + + with open(csv_path, 'w', encoding='utf-8', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for q in queries: + row = q['data'].copy() + for field in new_fields: + row[field] = q[field] + writer.writerow(row) + + print(f" Written: {csv_path.name}") + + +def process_provider_directory(provider_path: Path): + """Process all CSV/PCAP pairs in a provider directory.""" + print(f"\n{'='*60}") + print(f"Processing: {provider_path.name.upper()}") + print(f"{'='*60}") + + csv_files = sorted(provider_path.glob('*.csv')) + processed = 0 + total_time = 0 for csv_path in csv_files: - pcap_path = csv_path.with_suffix('.pcap') - rel_path = csv_path.relative_to(args.input_dir) - output_path = Path(args.output) / rel_path + # Skip backup files + if '.bak' in csv_path.name: + continue - if enhance_csv(str(csv_path), str(pcap_path), str(output_path), - args.debug): - success += 1 - else: - failed += 1 - print() + pcap_path = csv_path.with_suffix('.pcap') + + if not pcap_path.exists(): + print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP") + continue + + print(f"\n 📁 {csv_path.name}") + file_start = time.time() + + # Load PCAP into memory first + packets = load_pcap_into_memory(pcap_path) + if not packets: + print(f" ⚠ No packets found in PCAP") + continue + + # Load CSV queries + queries = load_csv_queries(csv_path) + if not queries: + print(f" ⚠ No valid queries found") + continue + + print(f" Loaded {len(queries):,} queries") + + # Match packets to queries + enriched_queries = match_packets_to_queries(packets, queries) + + # Write enriched CSV + write_enriched_csv(csv_path, enriched_queries) + + file_time = time.time() - file_start + total_time += file_time + processed += 1 + print(f" ✓ Completed in {file_time:.2f}s") - # Summary - print("=" * 60) - print(f"✓ Success: {success}") - print(f"✗ Failed: {failed}") - print(f"Total: {len(csv_files)}") - print(f"\nOutput: {args.output}") - - return 0 if failed == 0 else 1 + print(f"\n {'='*58}") + print(f" {provider_path.name}: {processed} files in {total_time:.2f}s") + print(f" {'='*58}") -if __name__ == "__main__": - exit(main()) + +def main(): + """Main preprocessing pipeline.""" + overall_start = time.time() + + print("\n" + "="*60) + print("DNS PCAP PREPROCESSOR - Memory-Optimized Edition") + print("="*60) + + results_dir = Path('results') + + if not results_dir.exists(): + print(f"\n❌ Error: '{results_dir}' directory not found") + return + + providers = ['adguard', 'cloudflare', 'google', 'quad9'] + + for provider in providers: + provider_path = results_dir / provider + if provider_path.exists(): + process_provider_directory(provider_path) + else: + print(f"\n⚠ Warning: Provider directory not found: {provider}") + + overall_time = time.time() - overall_start + + print("\n" + "="*60) + print(f"✓ PREPROCESSING COMPLETE") + print(f" Total time: {overall_time:.2f}s ({overall_time/60:.1f} minutes)") + print("="*60 + "\n") + + +if __name__ == '__main__': + main() diff --git a/scripts/tools/csvs_to_sqlite.py b/scripts/tools/csvs_to_sqlite.py new file mode 100644 index 0000000..e501de9 --- /dev/null +++ b/scripts/tools/csvs_to_sqlite.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python3 +""" +Convert DNS CSV files to SQLite database. +Creates a single normalized table with unified DNSSEC handling. +""" + +import sqlite3 +import csv +from pathlib import Path +from dateutil import parser as date_parser + + +def create_database_schema(conn: sqlite3.Connection): + """Create the database schema with indexes.""" + cursor = conn.cursor() + + # Main queries table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS dns_queries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + + -- Metadata + provider TEXT NOT NULL, + protocol TEXT NOT NULL, + dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')), + + -- Query details + domain TEXT NOT NULL, + query_type TEXT NOT NULL, + keep_alive BOOLEAN NOT NULL, + dns_server TEXT NOT NULL, + + -- Timing + timestamp TEXT NOT NULL, + timestamp_unix REAL NOT NULL, + duration_ns INTEGER NOT NULL, + duration_ms REAL NOT NULL, + + -- Size metrics + request_size_bytes INTEGER, + response_size_bytes INTEGER, + + -- Network metrics (from PCAP) + bytes_sent INTEGER DEFAULT 0, + bytes_received INTEGER DEFAULT 0, + packets_sent INTEGER DEFAULT 0, + packets_received INTEGER DEFAULT 0, + total_bytes INTEGER DEFAULT 0, + + -- Response + response_code TEXT, + error TEXT + ) + """) + + # Create indexes for common queries + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_provider + ON dns_queries(provider) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_protocol + ON dns_queries(protocol) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_dnssec_mode + ON dns_queries(dnssec_mode) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_keep_alive + ON dns_queries(keep_alive) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec + ON dns_queries(provider, protocol, dnssec_mode) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_timestamp + ON dns_queries(timestamp_unix) + """) + + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_domain + ON dns_queries(domain) + """) + + conn.commit() + + +def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]: + """ + Extract base protocol, DNSSEC mode, and keep_alive from filename. + Returns (base_protocol, dnssec_mode, keep_alive) + + Examples: + 'udp.csv' -> ('udp', 'off', False) + 'udp-auth.csv' -> ('udp', 'auth', False) + 'tls.csv' -> ('tls', 'off', False) + 'tls-persist.csv' -> ('tls', 'off', True) + 'https-persist.csv' -> ('https', 'off', True) + 'https-auth-persist.csv' -> ('https', 'auth', True) + 'https-trust-persist.csv' -> ('https', 'trust', True) + 'doh3-auth.csv' -> ('doh3', 'auth', False) + 'doq.csv' -> ('doq', 'off', False) + """ + name = filename.replace('.csv', '') + + # Check for persist suffix (keep_alive) + keep_alive = False + if name.endswith('-persist'): + keep_alive = True + name = name.replace('-persist', '') + + # Check for DNSSEC suffix + dnssec_mode = 'off' + if name.endswith('-auth'): + dnssec_mode = 'auth' + name = name.replace('-auth', '') + elif name.endswith('-trust'): + dnssec_mode = 'trust' + name = name.replace('-trust', '') + + # For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless) + if name in ['udp', 'doh3', 'doq']: + keep_alive = False + + return (name, dnssec_mode, keep_alive) + + +def str_to_bool(value: str) -> bool: + """Convert string boolean to Python bool.""" + return value.lower() in ('true', '1', 'yes') + + +def import_csv_to_db( + csv_path: Path, + provider: str, + conn: sqlite3.Connection +) -> int: + """Import a CSV file into the database.""" + protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name) + + cursor = conn.cursor() + rows_imported = 0 + + with open(csv_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + + for row in reader: + try: + # Parse timestamp to Unix epoch + dt = date_parser.isoparse(row['timestamp']) + timestamp_unix = dt.timestamp() + + # Use keep_alive from filename (more reliable than CSV) + keep_alive = keep_alive_from_filename + + # Handle optional fields (may not exist in older CSVs) + bytes_sent = int(row.get('bytes_sent', 0) or 0) + bytes_received = int(row.get('bytes_received', 0) or 0) + packets_sent = int(row.get('packets_sent', 0) or 0) + packets_received = int(row.get('packets_received', 0) or 0) + total_bytes = int(row.get('total_bytes', 0) or 0) + + cursor.execute(""" + INSERT INTO dns_queries ( + provider, protocol, dnssec_mode, + domain, query_type, keep_alive, + dns_server, timestamp, timestamp_unix, + duration_ns, duration_ms, + request_size_bytes, response_size_bytes, + bytes_sent, bytes_received, packets_sent, packets_received, total_bytes, + response_code, error + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + provider, + protocol, + dnssec_mode, + row['domain'], + row['query_type'], + keep_alive, + row['dns_server'], + row['timestamp'], + timestamp_unix, + int(row['duration_ns']), + float(row['duration_ms']), + int(row.get('request_size_bytes') or 0), + int(row.get('response_size_bytes') or 0), + bytes_sent, + bytes_received, + packets_sent, + packets_received, + total_bytes, + row.get('response_code', ''), + row.get('error', '') + )) + + rows_imported += 1 + + except Exception as e: + print(f" Warning: Skipping row - {e}") + continue + + conn.commit() + return rows_imported + + +def main(): + """Main import pipeline.""" + print("\n" + "="*60) + print("CSV to SQLite Database Converter") + print("="*60) + + results_dir = Path('results') + db_path = Path('dns.db') + + if not results_dir.exists(): + print(f"\n❌ Error: '{results_dir}' directory not found") + return + + # Remove existing database + if db_path.exists(): + print(f"\n⚠ Removing existing database: {db_path}") + db_path.unlink() + + # Create database and schema + print(f"\n📊 Creating database: {db_path}") + conn = sqlite3.connect(db_path) + create_database_schema(conn) + print("✓ Schema created") + + # Import CSVs + providers = ['adguard', 'cloudflare', 'google', 'quad9'] + total_rows = 0 + total_files = 0 + + for provider in providers: + provider_path = results_dir / provider + + if not provider_path.exists(): + print(f"\n⚠ Skipping {provider} - directory not found") + continue + + print(f"\n{'='*60}") + print(f"Importing: {provider.upper()}") + print(f"{'='*60}") + + csv_files = sorted(provider_path.glob('*.csv')) + provider_rows = 0 + provider_files = 0 + + for csv_path in csv_files: + # Skip backup files + if '.bak' in csv_path.name: + continue + + protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name) + ka_str = "persistent" if keep_alive else "non-persist" + print(f" 📄 {csv_path.name:30} → {protocol:8} (DNSSEC: {dnssec:5}, {ka_str})") + + rows = import_csv_to_db(csv_path, provider, conn) + print(f" ✓ Imported {rows:,} rows") + + provider_rows += rows + provider_files += 1 + + print(f"\n Total: {provider_files} files, {provider_rows:,} rows") + total_rows += provider_rows + total_files += provider_files + + # Create summary + print(f"\n{'='*60}") + print("Database Summary") + print(f"{'='*60}") + + cursor = conn.cursor() + + # Total counts + cursor.execute("SELECT COUNT(*) FROM dns_queries") + total_queries = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries") + unique_providers = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries") + unique_protocols = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries") + unique_domains = cursor.fetchone()[0] + + print(f"\nTotal queries: {total_queries:,}") + print(f"Providers: {unique_providers}") + print(f"Protocols: {unique_protocols}") + print(f"Unique domains: {unique_domains}") + + # Show breakdown by provider, protocol, DNSSEC, and keep_alive + print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:") + print(f"{'-'*80}") + + cursor.execute(""" + SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count + FROM dns_queries + GROUP BY provider, protocol, dnssec_mode, keep_alive + ORDER BY provider, protocol, dnssec_mode, keep_alive + """) + + current_provider = None + for provider, protocol, dnssec, keep_alive, count in cursor.fetchall(): + if current_provider != provider: + if current_provider is not None: + print() + current_provider = provider + + ka_str = "✓" if keep_alive else "✗" + print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries") + + # Protocol distribution + print(f"\n{'-'*80}") + print("Protocol Distribution:") + print(f"{'-'*80}") + + cursor.execute(""" + SELECT protocol, COUNT(*) as count + FROM dns_queries + GROUP BY protocol + ORDER BY protocol + """) + + for protocol, count in cursor.fetchall(): + pct = (count / total_queries) * 100 + print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)") + + # DNSSEC mode distribution + print(f"\n{'-'*80}") + print("DNSSEC Mode Distribution:") + print(f"{'-'*80}") + + cursor.execute(""" + SELECT dnssec_mode, COUNT(*) as count + FROM dns_queries + GROUP BY dnssec_mode + ORDER BY dnssec_mode + """) + + for dnssec_mode, count in cursor.fetchall(): + pct = (count / total_queries) * 100 + print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)") + + # Keep-Alive distribution + print(f"\n{'-'*80}") + print("Keep-Alive Distribution:") + print(f"{'-'*80}") + + cursor.execute(""" + SELECT keep_alive, COUNT(*) as count + FROM dns_queries + GROUP BY keep_alive + """) + + for keep_alive, count in cursor.fetchall(): + ka_label = "Persistent" if keep_alive else "Non-persistent" + pct = (count / total_queries) * 100 + print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)") + + conn.close() + + print(f"\n{'='*60}") + print(f"✓ Database created successfully: {db_path}") + print(f" Total: {total_files} files, {total_rows:,} rows") + print(f"{'='*60}\n") + + # Print usage examples + print("\n📖 Usage Examples for Metabase:") + print(f"{'-'*60}") + + print("\n1. Compare protocols (DNSSEC off, persistent only):") + print(""" SELECT provider, protocol, + AVG(duration_ms) as avg_latency, + AVG(total_bytes) as avg_bytes + FROM dns_queries + WHERE dnssec_mode = 'off' AND keep_alive = 1 + GROUP BY provider, protocol;""") + + print("\n2. DNSSEC impact on UDP:") + print(""" SELECT provider, dnssec_mode, + AVG(duration_ms) as avg_latency + FROM dns_queries + WHERE protocol = 'udp' + GROUP BY provider, dnssec_mode;""") + + print("\n3. Keep-alive impact on TLS:") + print(""" SELECT provider, keep_alive, + AVG(duration_ms) as avg_latency, + AVG(total_bytes) as avg_bytes + FROM dns_queries + WHERE protocol = 'tls' AND dnssec_mode = 'off' + GROUP BY provider, keep_alive;""") + + print("\n4. Time series for line graphs:") + print(""" SELECT timestamp_unix, duration_ms, total_bytes + FROM dns_queries + WHERE provider = 'cloudflare' + AND protocol = 'https' + AND dnssec_mode = 'off' + AND keep_alive = 1 + ORDER BY timestamp_unix;""") + + print("\n5. Overall comparison table:") + print(""" SELECT protocol, dnssec_mode, keep_alive, + COUNT(*) as queries, + AVG(duration_ms) as avg_latency, + AVG(total_bytes) as avg_bytes + FROM dns_queries + GROUP BY protocol, dnssec_mode, keep_alive + ORDER BY protocol, dnssec_mode, keep_alive;""") + + print(f"\n{'-'*60}\n") + + +if __name__ == '__main__': + main()