feat(dns): add dnscrypt and dns over tcp

This commit is contained in:
2026-02-04 22:08:05 +00:00
parent 5d9b630d13
commit 92351a80a9
12 changed files with 2576 additions and 568 deletions

View File

@@ -8,10 +8,12 @@ import (
"github.com/afonsofrancof/sdns-proxy/common/dnssec"
"github.com/afonsofrancof/sdns-proxy/common/logger"
"github.com/afonsofrancof/sdns-proxy/common/protocols/do53"
"github.com/afonsofrancof/sdns-proxy/common/protocols/dnscrypt"
"github.com/afonsofrancof/sdns-proxy/common/protocols/doh"
"github.com/afonsofrancof/sdns-proxy/common/protocols/doq"
"github.com/afonsofrancof/sdns-proxy/common/protocols/dot"
"github.com/afonsofrancof/sdns-proxy/common/protocols/dotcp"
"github.com/afonsofrancof/sdns-proxy/common/protocols/doudp"
"github.com/miekg/dns"
)
@@ -37,7 +39,7 @@ type Options struct {
func New(upstream string, opts Options) (DNSClient, error) {
logger.Debug("Creating DNS client for upstream: %s with options: %+v", upstream, opts)
// Try to parse as URL first
// Try to parse as URL
parsedURL, err := url.Parse(upstream)
if err != nil {
logger.Error("Invalid upstream format: %v", err)
@@ -51,7 +53,7 @@ func New(upstream string, opts Options) (DNSClient, error) {
logger.Debug("Parsing %s as URL with scheme %s", upstream, parsedURL.Scheme)
baseClient, err = createClientFromURL(parsedURL, opts)
} else {
// No scheme - treat as plain DNS address
// No scheme - treat as plain DNS address (defaults to UDP)
logger.Debug("Parsing %s as plain DNS address", upstream)
baseClient, err = createClientFromPlainAddress(upstream, opts)
}
@@ -200,7 +202,8 @@ func createClientFromPlainAddress(address string, opts Options) (DNSClient, erro
}
logger.Debug("Creating client from plain address: host=%s, port=%s", host, port)
return createClient("", host, port, "", opts)
// Default to UDP for plain addresses
return createClient("udp", host, port, "", opts)
}
func getDefaultPort(scheme string) string {
@@ -212,6 +215,8 @@ func getDefaultPort(scheme string) string {
port = "853"
case "quic", "doq":
port = "853"
case "dnscrypt":
port = "443"
}
logger.Debug("Default port for scheme %s: %s", scheme, port)
return port
@@ -232,13 +237,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err
scheme, host, port, path, opts.DNSSEC, opts.KeepAlive)
switch scheme {
case "udp", "tcp", "do53", "":
config := do53.Config{
case "udp", "doudp", "":
config := doudp.Config{
HostAndPort: net.JoinHostPort(host, port),
DNSSEC: opts.DNSSEC,
}
logger.Debug("Creating DO53 client with config: %+v", config)
return do53.New(config)
logger.Debug("Creating DoUDP client with config: %+v", config)
return doudp.New(config)
case "tcp", "dotcp":
config := dotcp.Config{
HostAndPort: net.JoinHostPort(host, port),
DNSSEC: opts.DNSSEC,
KeepAlive: opts.KeepAlive,
}
logger.Debug("Creating DoTCP client with config: %+v", config)
return dotcp.New(config)
case "https", "doh":
config := doh.Config{
@@ -274,11 +288,22 @@ func createClient(scheme, host, port, path string, opts Options) (DNSClient, err
logger.Debug("Creating DoT client with config: %+v", config)
return dot.New(config)
case "sdns":
config := dnscrypt.Config{
// Janky solution but whatever
// Here we rejoin them as the client wants them together
// The host is not really a host but whatever
ServerStamp: fmt.Sprintf("%v://%v",scheme,host),
DNSSEC: opts.DNSSEC,
}
logger.Debug("Creating DNSCrypt client with stamp")
return dnscrypt.New(config)
case "doq":
config := doq.Config{
Host: host,
Port: port,
DNSSEC: opts.DNSSEC,
Host: host,
Port: port,
DNSSEC: opts.DNSSEC,
}
logger.Debug("Creating DoQ client with config: %+v", config)
return doq.New(config)

View File

@@ -1,3 +1,85 @@
package dnscrypt
// DNSCrypt resolver implementation
import (
"fmt"
"time"
"github.com/afonsofrancof/sdns-proxy/common/logger"
"github.com/ameshkov/dnscrypt/v2"
"github.com/miekg/dns"
)
type Config struct {
ServerStamp string
DNSSEC bool
WriteTimeout time.Duration
ReadTimeout time.Duration
}
type Client struct {
resolver *dnscrypt.Client
config Config
ri *dnscrypt.ResolverInfo
}
func New(config Config) (*Client, error) {
logger.Debug("Creating DNSCrypt client with stamp: %s", config.ServerStamp)
if config.ServerStamp == "" {
logger.Error("DNSCrypt client creation failed: empty ServerStamp")
return nil, fmt.Errorf("dnscrypt: ServerStamp cannot be empty")
}
if config.WriteTimeout <= 0 {
config.WriteTimeout = 5 * time.Second
}
if config.ReadTimeout <= 0 {
config.ReadTimeout = 10 * time.Second
}
resolver := &dnscrypt.Client{
Net: "udp",
Timeout: config.ReadTimeout,
}
// Resolve the server info from the stamp
ri, err := resolver.Dial(config.ServerStamp)
if err != nil {
logger.Error("DNSCrypt failed to dial server: %v", err)
return nil, fmt.Errorf("dnscrypt: failed to dial server: %w", err)
}
logger.Debug("DNSCrypt client created (DNSSEC: %v)", config.DNSSEC)
return &Client{
resolver: resolver,
config: config,
ri: ri,
}, nil
}
func (c *Client) Close() {
// The dnscrypt library doesn't require explicit cleanup
}
func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
if len(msg.Question) > 0 {
question := msg.Question[0]
logger.Debug("DNSCrypt query: %s %s", question.Name, dns.TypeToString[question.Qtype])
}
if c.config.DNSSEC {
msg.SetEdns0(4096, true)
}
response, err := c.resolver.Exchange(msg, c.ri)
if err != nil {
logger.Error("DNSCrypt query failed: %v", err)
return nil, fmt.Errorf("dnscrypt: query failed: %w", err)
}
if len(response.Answer) > 0 {
logger.Debug("DNSCrypt response: %d answers", len(response.Answer))
}
return response, nil
}

View File

@@ -0,0 +1,222 @@
package dotcp
import (
"encoding/binary"
"fmt"
"io"
"net"
"sync"
"time"
"github.com/afonsofrancof/sdns-proxy/common/logger"
"github.com/miekg/dns"
)
type Config struct {
HostAndPort string
DNSSEC bool
KeepAlive bool
WriteTimeout time.Duration
ReadTimeout time.Duration
}
type Client struct {
hostAndPort string
config Config
conn net.Conn
connMutex sync.Mutex
}
func New(config Config) (*Client, error) {
logger.Debug("Creating DoTCP client: %s (KeepAlive: %v)", config.HostAndPort, config.KeepAlive)
if config.HostAndPort == "" {
logger.Error("DoTCP client creation failed: empty HostAndPort")
return nil, fmt.Errorf("dotcp: HostAndPort cannot be empty")
}
if config.WriteTimeout <= 0 {
config.WriteTimeout = 2 * time.Second
}
if config.ReadTimeout <= 0 {
config.ReadTimeout = 5 * time.Second
}
client := &Client{
hostAndPort: config.HostAndPort,
config: config,
}
if config.KeepAlive {
if err := client.ensureConnection(); err != nil {
logger.Error("DoTCP failed to establish initial connection: %v", err)
return nil, fmt.Errorf("failed to establish initial connection: %w", err)
}
}
logger.Debug("DoTCP client created: %s (DNSSEC: %v, KeepAlive: %v)", config.HostAndPort, config.DNSSEC, config.KeepAlive)
return client, nil
}
func (c *Client) Close() {
logger.Debug("Closing DoTCP client")
c.connMutex.Lock()
defer c.connMutex.Unlock()
if c.conn != nil {
c.conn.Close()
c.conn = nil
}
}
func (c *Client) ensureConnection() error {
c.connMutex.Lock()
defer c.connMutex.Unlock()
if c.conn != nil {
if err := c.conn.SetReadDeadline(time.Now().Add(time.Millisecond)); err == nil {
var testBuf [1]byte
_, err := c.conn.Read(testBuf[:])
c.conn.SetReadDeadline(time.Time{})
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
return nil
}
logger.Debug("DoTCP connection test failed, reconnecting: %v", err)
c.conn.Close()
c.conn = nil
}
}
logger.Debug("Establishing DoTCP connection to %s", c.hostAndPort)
dialer := &net.Dialer{
Timeout: c.config.WriteTimeout,
}
conn, err := dialer.Dial("tcp", c.hostAndPort)
if err != nil {
logger.Error("DoTCP connection failed to %s: %v", c.hostAndPort, err)
return err
}
c.conn = conn
logger.Debug("DoTCP connection established to %s", c.hostAndPort)
return nil
}
func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
if len(msg.Question) > 0 {
question := msg.Question[0]
logger.Debug("DoTCP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
}
if c.config.KeepAlive {
if err := c.ensureConnection(); err != nil {
return nil, fmt.Errorf("dotcp: failed to ensure connection: %w", err)
}
} else {
c.connMutex.Lock()
if c.conn != nil {
c.conn.Close()
c.conn = nil
}
c.connMutex.Unlock()
if err := c.ensureConnection(); err != nil {
return nil, fmt.Errorf("dotcp: failed to create connection: %w", err)
}
}
if c.config.DNSSEC {
msg.SetEdns0(4096, true)
}
packed, err := msg.Pack()
if err != nil {
logger.Error("DoTCP failed to pack message: %v", err)
return nil, fmt.Errorf("dotcp: failed to pack message: %w", err)
}
// DNS over TCP uses 2-byte length prefix
length := make([]byte, 2)
binary.BigEndian.PutUint16(length, uint16(len(packed)))
data := append(length, packed...)
c.connMutex.Lock()
conn := c.conn
c.connMutex.Unlock()
if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
logger.Error("DoTCP failed to set write deadline: %v", err)
return nil, fmt.Errorf("dotcp: failed to set write deadline: %w", err)
}
if _, err := conn.Write(data); err != nil {
logger.Error("DoTCP failed to write message to %s: %v", c.hostAndPort, err)
if c.config.KeepAlive {
logger.Debug("DoTCP write failed with keep-alive, attempting reconnect")
if reconnectErr := c.ensureConnection(); reconnectErr != nil {
return nil, fmt.Errorf("dotcp: failed to reconnect: %w", reconnectErr)
}
c.connMutex.Lock()
conn = c.conn
c.connMutex.Unlock()
if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
return nil, fmt.Errorf("dotcp: failed to set write deadline after reconnect: %w", err)
}
if _, err := conn.Write(data); err != nil {
return nil, fmt.Errorf("dotcp: failed to write message after reconnect: %w", err)
}
} else {
return nil, fmt.Errorf("dotcp: failed to write message: %w", err)
}
}
if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil {
logger.Error("DoTCP failed to set read deadline: %v", err)
return nil, fmt.Errorf("dotcp: failed to set read deadline: %w", err)
}
lengthBuf := make([]byte, 2)
if _, err := io.ReadFull(conn, lengthBuf); err != nil {
logger.Error("DoTCP failed to read response length from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("dotcp: failed to read response length: %w", err)
}
msgLen := binary.BigEndian.Uint16(lengthBuf)
if msgLen > dns.MaxMsgSize {
logger.Error("DoTCP response too large from %s: %d bytes", c.hostAndPort, msgLen)
return nil, fmt.Errorf("dotcp: response message too large: %d", msgLen)
}
buffer := make([]byte, msgLen)
if _, err := io.ReadFull(conn, buffer); err != nil {
logger.Error("DoTCP failed to read response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("dotcp: failed to read response: %w", err)
}
response := new(dns.Msg)
if err := response.Unpack(buffer); err != nil {
logger.Error("DoTCP failed to unpack response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("dotcp: failed to unpack response: %w", err)
}
if len(response.Answer) > 0 {
logger.Debug("DoTCP response from %s: %d answers", c.hostAndPort, len(response.Answer))
}
if !c.config.KeepAlive {
c.connMutex.Lock()
if c.conn != nil {
c.conn.Close()
c.conn = nil
}
c.connMutex.Unlock()
}
return response, nil
}

View File

@@ -1,4 +1,4 @@
package do53
package doudp
import (
"fmt"
@@ -22,11 +22,11 @@ type Client struct {
}
func New(config Config) (*Client, error) {
logger.Debug("Creating DO53 client: %s", config.HostAndPort)
logger.Debug("Creating DoUDP client: %s", config.HostAndPort)
if config.HostAndPort == "" {
logger.Error("DO53 client creation failed: empty HostAndPort")
return nil, fmt.Errorf("do53: HostAndPort cannot be empty")
logger.Error("DoUDP client creation failed: empty HostAndPort")
return nil, fmt.Errorf("doudp: HostAndPort cannot be empty")
}
if config.WriteTimeout <= 0 {
config.WriteTimeout = 2 * time.Second
@@ -35,7 +35,7 @@ func New(config Config) (*Client, error) {
config.ReadTimeout = 5 * time.Second
}
logger.Debug("DO53 client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC)
logger.Debug("DoUDP client created: %s (DNSSEC: %v)", config.HostAndPort, config.DNSSEC)
return &Client{
hostAndPort: config.HostAndPort,
@@ -44,36 +44,35 @@ func New(config Config) (*Client, error) {
}
func (c *Client) Close() {
logger.Debug("Closing DO53 client")
logger.Debug("Closing DoUDP client")
}
func (c *Client) createConnection() (*net.UDPConn, error) {
udpAddr, err := net.ResolveUDPAddr("udp", c.hostAndPort)
if err != nil {
logger.Error("DO53 failed to resolve address %s: %v", c.hostAndPort, err)
logger.Error("DoUDP failed to resolve address %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("failed to resolve UDP address: %w", err)
}
conn, err := net.DialUDP("udp", nil, udpAddr)
if err != nil {
logger.Error("DO53 failed to connect to %s: %v", c.hostAndPort, err)
logger.Error("DoUDP failed to connect to %s: %v", c.hostAndPort, err)
return nil, err
}
logger.Debug("DO53 connection established to %s", c.hostAndPort)
logger.Debug("DoUDP connection established to %s", c.hostAndPort)
return conn, nil
}
func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
if len(msg.Question) > 0 {
question := msg.Question[0]
logger.Debug("DO53 query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
logger.Debug("DoUDP query: %s %s to %s", question.Name, dns.TypeToString[question.Qtype], c.hostAndPort)
}
// Create connection for this query
conn, err := c.createConnection()
if err != nil {
return nil, fmt.Errorf("do53: failed to create connection: %w", err)
return nil, fmt.Errorf("doudp: failed to create connection: %w", err)
}
defer conn.Close()
@@ -83,43 +82,40 @@ func (c *Client) Query(msg *dns.Msg) (*dns.Msg, error) {
packedMsg, err := msg.Pack()
if err != nil {
logger.Error("DO53 failed to pack message: %v", err)
return nil, fmt.Errorf("do53: failed to pack DNS message: %w", err)
logger.Error("DoUDP failed to pack message: %v", err)
return nil, fmt.Errorf("doudp: failed to pack DNS message: %w", err)
}
// Send query
if err := conn.SetWriteDeadline(time.Now().Add(c.config.WriteTimeout)); err != nil {
logger.Error("DO53 failed to set write deadline: %v", err)
return nil, fmt.Errorf("do53: failed to set write deadline: %w", err)
logger.Error("DoUDP failed to set write deadline: %v", err)
return nil, fmt.Errorf("doudp: failed to set write deadline: %w", err)
}
if _, err := conn.Write(packedMsg); err != nil {
logger.Error("DO53 failed to send query to %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("do53: failed to send DNS query: %w", err)
logger.Error("DoUDP failed to send query to %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("doudp: failed to send DNS query: %w", err)
}
// Read response
if err := conn.SetReadDeadline(time.Now().Add(c.config.ReadTimeout)); err != nil {
logger.Error("DO53 failed to set read deadline: %v", err)
return nil, fmt.Errorf("do53: failed to set read deadline: %w", err)
logger.Error("DoUDP failed to set read deadline: %v", err)
return nil, fmt.Errorf("doudp: failed to set read deadline: %w", err)
}
buffer := make([]byte, dns.MaxMsgSize)
n, err := conn.Read(buffer)
if err != nil {
logger.Error("DO53 failed to read response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("do53: failed to read DNS response: %w", err)
logger.Error("DoUDP failed to read response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("doudp: failed to read DNS response: %w", err)
}
// Parse response
response := new(dns.Msg)
if err := response.Unpack(buffer[:n]); err != nil {
logger.Error("DO53 failed to unpack response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("do53: failed to unpack DNS response: %w", err)
logger.Error("DoUDP failed to unpack response from %s: %v", c.hostAndPort, err)
return nil, fmt.Errorf("doudp: failed to unpack DNS response: %w", err)
}
if len(response.Answer) > 0 {
logger.Debug("DO53 response from %s: %d answers", c.hostAndPort, len(response.Answer))
logger.Debug("DoUDP response from %s: %d answers", c.hostAndPort, len(response.Answer))
}
return response, nil

23
go.mod
View File

@@ -1,26 +1,29 @@
module github.com/afonsofrancof/sdns-proxy
go 1.24.0
go 1.24.1
require (
github.com/alecthomas/kong v1.8.1
github.com/ameshkov/dnscrypt/v2 v2.4.0
github.com/google/gopacket v1.1.19
github.com/miekg/dns v1.1.63
github.com/miekg/dns v1.1.65
github.com/quic-go/quic-go v0.50.0
golang.org/x/net v0.35.0
golang.org/x/net v0.38.0
)
require (
github.com/AdguardTeam/golibs v0.32.7 // indirect
github.com/ameshkov/dnsstamps v1.0.3 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/onsi/ginkgo/v2 v2.9.5 // indirect
github.com/quic-go/qpack v0.5.1 // indirect
go.uber.org/mock v0.5.0 // indirect
golang.org/x/crypto v0.33.0 // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
golang.org/x/mod v0.18.0 // indirect
golang.org/x/sync v0.11.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/text v0.22.0 // indirect
golang.org/x/tools v0.22.0 // indirect
golang.org/x/crypto v0.37.0 // indirect
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/sync v0.13.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/text v0.24.0 // indirect
golang.org/x/tools v0.31.0 // indirect
)

62
go.sum
View File

@@ -1,23 +1,29 @@
github.com/AdguardTeam/golibs v0.32.7 h1:3dmGlAVgmvquCCwHsvEl58KKcRAK3z1UnjMnwSIeDH4=
github.com/AdguardTeam/golibs v0.32.7/go.mod h1:bE8KV1zqTzgZjmjFyBJ9f9O5DEKO717r7e57j1HclJA=
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/kong v1.8.1 h1:6aamvWBE/REnR/BCq10EcozmcpUPc5aGI1lPAWdB0EE=
github.com/alecthomas/kong v1.8.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/ameshkov/dnscrypt/v2 v2.4.0 h1:if6ZG2cuQmcP2TwSY+D0+8+xbPfoatufGlOQTMNkI9o=
github.com/ameshkov/dnscrypt/v2 v2.4.0/go.mod h1:WpEFV2uhebXb8Jhes/5/fSdpmhGV8TL22RDaeWwV6hI=
github.com/ameshkov/dnsstamps v1.0.3 h1:Srzik+J9mivH1alRACTbys2xOxs0lRH9qnTA7Y1OYVo=
github.com/ameshkov/dnsstamps v1.0.3/go.mod h1:Ii3eUu73dx4Vw5O4wjzmT5+lkCwovjzaEZZ4gKyIH5A=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
@@ -25,8 +31,8 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLe
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/miekg/dns v1.1.63 h1:8M5aAw6OMZfFXTT7K5V0Eu5YiiL8l7nUAkyN6C9YwaY=
github.com/miekg/dns v1.1.63/go.mod h1:6NGHfjhpmr5lt3XPLuyfDJi5AXbNIPM9PY6H6sF1Nfs=
github.com/miekg/dns v1.1.65 h1:0+tIPHzUW0GCge7IiK3guGP57VAw7hoPDfApjkMD1Fc=
github.com/miekg/dns v1.1.65/go.mod h1:Dzw9769uoKVaLuODMDZz9M6ynFU6Em65csPuoi8G0ck=
github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
@@ -39,43 +45,43 @@ github.com/quic-go/quic-go v0.50.0 h1:3H/ld1pa3CYhkcc20TPIyG1bNsdhn9qZBGN3b9/UyU
github.com/quic-go/quic-go v0.50.0/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw=
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -1,3 +1,4 @@
// ./internal/qol/utils.go
package qol
import (
@@ -15,7 +16,7 @@ func GenerateOutputPaths(outputDir, upstream string, dnssec, authDNSSEC, keepAli
base := proto
var flags []string
if dnssec {
if authDNSSEC {
flags = append(flags, "auth")
@@ -57,6 +58,7 @@ func cleanServerName(server string) string {
"94.140.15.15": "adguard",
"dns.adguard.com": "adguard",
"dns.adguard-dns.com": "adguard",
"AQMAAAAAAAAAETk0LjE0MC4xNS4xNTo1NDQzINErR_JS3PLCu_iZEIbq95zkSV2LFsigxDIuUso_OQhzIjIuZG5zY3J5cHQuZGVmYXVsdC5uczEuYWRndWFyZC5jb20": "adguard",
}
serverName := ""
@@ -76,11 +78,31 @@ func cleanServerName(server string) string {
}
func DetectProtocol(upstream string) string {
if strings.Contains(upstream, "://") {
u, err := url.Parse(upstream)
if err == nil && u.Scheme != "" {
return strings.ToLower(u.Scheme)
scheme := strings.ToLower(u.Scheme)
// Normalize scheme names
switch scheme {
case "udp", "doudp":
return "doudp"
case "tcp", "dotcp":
return "dotcp"
case "tls", "dot":
return "dot"
case "https", "doh":
return "doh"
case "doh3":
return "doh3"
case "doq":
return "doq"
case "sdns":
return "dnscrypt"
default:
return scheme
}
}
}
return "do53"
return "doudp"
}

View File

@@ -1,289 +1,498 @@
import csv
import os
import statistics
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
from scipy import stats
import warnings
def map_server_to_resolver(server):
"""Map server address/domain to resolver name"""
server_lower = server.lower()
if '1.1.1.1' in server_lower or 'cloudflare' in server_lower:
return 'Cloudflare'
elif '8.8.8.8' in server_lower or 'google' in server_lower:
return 'Google'
elif '9.9.9.9' in server_lower or 'quad9' in server_lower:
return 'Quad9'
elif 'adguard' in server_lower:
return 'AdGuard'
else:
return server # Fallback to original server name
warnings.filterwarnings('ignore')
def extract_from_new_format(filename):
"""Parse new filename format: protocol[-flags]-timestamp.csv"""
base = filename.replace('.csv', '')
parts = base.split('-')
if len(parts) < 2:
return None, None, None, None
protocol = parts[0]
timestamp = parts[-1]
# Flags are everything between protocol and timestamp
flags_str = '-'.join(parts[1:-1])
# Determine DNSSEC status
if 'auth' in flags_str:
dnssec_status = 'auth' # Authoritative DNSSEC
elif 'trust' in flags_str:
dnssec_status = 'trust' # Trust-based DNSSEC
else:
dnssec_status = 'off'
keepalive_status = 'on' if 'persist' in flags_str else 'off'
return protocol, dnssec_status, keepalive_status, flags_str
# Set style for publication-quality plots
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['figure.figsize'] = (12, 6)
def extract_server_info_from_csv(row):
"""Extract DNSSEC info from CSV row data"""
dnssec = row.get('dnssec', 'false').lower() == 'true'
auth_dnssec = row.get('auth_dnssec', 'false').lower() == 'true'
keepalive = row.get('keep_alive', 'false').lower() == 'true'
if dnssec:
if auth_dnssec:
dnssec_status = 'auth'
else:
dnssec_status = 'trust'
else:
dnssec_status = 'off'
keepalive_status = 'on' if keepalive else 'off'
return dnssec_status, keepalive_status
def extract_server_info(file_path, row):
"""Extract info using directory structure, filename, and CSV data"""
path = Path(file_path)
# First try to get DNSSEC info from CSV row (most accurate)
try:
csv_dnssec_status, csv_keepalive_status = extract_server_info_from_csv(row)
protocol = row.get('protocol', '').lower()
class DNSAnalyzer:
def __init__(self, results_dir='results'):
self.results_dir = Path(results_dir)
self.df = None
# Get server from directory structure
parts = path.parts
if len(parts) >= 4:
potential_date = parts[-2]
# Check if it's a date like YYYY-MM-DD
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
server = parts[-3] # resolver folder (e.g., cloudflare)
return protocol, server, csv_dnssec_status, csv_keepalive_status
def load_all_data(self):
"""Load all CSV files from the results directory"""
data_frames = []
# Fallback to DNS server field
server = row.get('dns_server', '')
return protocol, server, csv_dnssec_status, csv_keepalive_status
providers = ['adguard', 'cloudflare', 'google', 'quad9']
except (KeyError, ValueError):
pass
# Fallback to filename parsing
filename = path.name
protocol, dnssec_status, keepalive_status, flags = extract_from_new_format(filename)
if protocol:
# Get server from directory structure
parts = path.parts
if len(parts) >= 4:
potential_date = parts[-2]
if len(potential_date) == 10 and potential_date[4] == '-' and potential_date[7] == '-' and potential_date.replace('-', '').isdigit():
server = parts[-3]
return protocol, server, dnssec_status, keepalive_status
# Fallback to DNS server field
server = row.get('dns_server', '')
return protocol, server, dnssec_status, keepalive_status
return None, None, None, None
def get_dnssec_display_name(dnssec_status):
"""Convert DNSSEC status to display name"""
if dnssec_status == 'auth':
return 'DNSSEC (Authoritative)'
elif dnssec_status == 'trust':
return 'DNSSEC (Trust-based)'
else:
return 'No DNSSEC'
def analyze_dns_data(root_directory, output_file):
"""Analyze DNS data and generate metrics"""
# Dictionary to store measurements: {(resolver, protocol, dnssec, keepalive): [durations]}
measurements = defaultdict(list)
# Walk through all directories
for root, dirs, files in os.walk(root_directory):
for file in files:
if file.endswith('.csv'):
file_path = os.path.join(root, file)
print(f"Processing: {file_path}")
for provider in providers:
provider_path = self.results_dir / provider
if not provider_path.exists():
continue
for csv_file in provider_path.glob('*.csv'):
try:
with open(file_path, 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row_num, row in enumerate(reader, 2): # Start at 2 since header is row 1
try:
protocol, server, dnssec_status, keepalive_status = extract_server_info(file_path, row)
if protocol and server:
resolver = map_server_to_resolver(server)
duration_ms = float(row.get('duration_ms', 0))
# Only include successful queries
if row.get('response_code', '') in ['NOERROR', '']:
key = (resolver, protocol, dnssec_status, keepalive_status)
measurements[key].append(duration_ms)
except (ValueError, TypeError) as e:
print(f"Data parse error in {file_path} row {row_num}: {e}")
continue
df = pd.read_csv(csv_file)
df['provider'] = provider
df['test_config'] = csv_file.stem
data_frames.append(df)
except Exception as e:
print(f"Error processing file {file_path}: {e}")
continue
# Calculate statistics grouped by resolver first, then by configuration
resolver_results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for (resolver, protocol, dnssec, keepalive), durations in measurements.items():
if durations:
stats = {
'protocol': protocol.upper(),
'dnssec': dnssec,
'keepalive': keepalive,
'total_queries': len(durations),
'avg_latency_ms': round(statistics.mean(durations), 3),
'median_latency_ms': round(statistics.median(durations), 3),
'min_latency_ms': round(min(durations), 3),
'max_latency_ms': round(max(durations), 3),
'std_dev_ms': round(statistics.stdev(durations) if len(durations) > 1 else 0, 3),
'p95_latency_ms': round(statistics.quantiles(durations, n=20)[18], 3) if len(durations) >= 20 else round(max(durations), 3),
'p99_latency_ms': round(statistics.quantiles(durations, n=100)[98], 3) if len(durations) >= 100 else round(max(durations), 3)
}
# Group by resolver -> dnssec -> keepalive -> protocol
resolver_results[resolver][dnssec][keepalive].append(stats)
# Sort each configuration's results by average latency
for resolver in resolver_results:
for dnssec in resolver_results[resolver]:
for keepalive in resolver_results[resolver][dnssec]:
resolver_results[resolver][dnssec][keepalive].sort(key=lambda x: x['avg_latency_ms'])
# Write to CSV with all data
all_results = []
for resolver in resolver_results:
for dnssec in resolver_results[resolver]:
for keepalive in resolver_results[resolver][dnssec]:
for result in resolver_results[resolver][dnssec][keepalive]:
result['resolver'] = resolver
all_results.append(result)
with open(output_file, 'w', newline='') as csvfile:
fieldnames = [
'resolver', 'protocol', 'dnssec', 'keepalive', 'total_queries',
'avg_latency_ms', 'median_latency_ms', 'min_latency_ms',
'max_latency_ms', 'std_dev_ms', 'p95_latency_ms', 'p99_latency_ms'
]
print(f"Error loading {csv_file}: {e}")
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_results)
print(f"\nAnalysis complete! Full results written to {output_file}")
print(f"Total measurements: {sum(len(durations) for durations in measurements.values())}")
def print_configuration_table(resolver, dnssec_status, keepalive_status, results):
"""Print a formatted table for a specific configuration"""
ka_indicator = "PERSISTENT" if keepalive_status == 'on' else "NEW CONN"
dnssec_display = get_dnssec_display_name(dnssec_status)
self.df = pd.concat(data_frames, ignore_index=True)
self._clean_and_enrich_data()
print(f"Loaded {len(self.df)} DNS queries across {len(data_frames)} test configurations")
print(f"\n {dnssec_display} - {ka_indicator}")
print(" " + "-" * 90)
print(f" {'Protocol':<12} {'Queries':<8} {'Avg(ms)':<10} {'Median(ms)':<12} {'Min(ms)':<10} {'Max(ms)':<10} {'P95(ms)':<10}")
print(" " + "-" * 90)
def _clean_and_enrich_data(self):
"""Clean data and add useful columns"""
# Remove failed queries
self.df = self.df[self.df['error'].isna()]
for result in results:
print(f" {result['protocol']:<12} {result['total_queries']:<8} "
f"{result['avg_latency_ms']:<10} {result['median_latency_ms']:<12} "
f"{result['min_latency_ms']:<10} {result['max_latency_ms']:<10} "
f"{result['p95_latency_ms']:<10}")
# Print results grouped by resolver first
print(f"\n{'=' * 100}")
print("DNS RESOLVER PERFORMANCE COMPARISON")
print(f"{'=' * 100}")
for resolver in sorted(resolver_results.keys()):
print(f"\n{resolver} DNS Resolver")
print("=" * 100)
# Extract protocol base (remove -auth, -trust suffixes)
self.df['protocol_base'] = self.df['protocol'].str.replace('-auth|-trust', '', regex=True)
# Order configurations logically
config_order = [
('off', 'off'), # No DNSSEC, New connections
('off', 'on'), # No DNSSEC, Persistent
('trust', 'off'), # Trust DNSSEC, New connections
('trust', 'on'), # Trust DNSSEC, Persistent
('auth', 'off'), # Auth DNSSEC, New connections
('auth', 'on'), # Auth DNSSEC, Persistent
]
# DNSSEC configuration
self.df['dnssec_mode'] = 'none'
self.df.loc[self.df['auth_dnssec'] == True, 'dnssec_mode'] = 'auth'
self.df.loc[(self.df['dnssec'] == True) & (self.df['auth_dnssec'] == False), 'dnssec_mode'] = 'trust'
for dnssec_status, keepalive_status in config_order:
if dnssec_status in resolver_results[resolver] and keepalive_status in resolver_results[resolver][dnssec_status]:
results = resolver_results[resolver][dnssec_status][keepalive_status]
if results: # Only print if there are results
print_configuration_table(resolver, dnssec_status, keepalive_status, results)
# Summary comparison across resolvers
print(f"\n{'=' * 100}")
print("CROSS-RESOLVER PROTOCOL COMPARISON")
print(f"{'=' * 100}")
# Group by protocol and configuration for cross-resolver comparison
protocol_comparison = defaultdict(lambda: defaultdict(list))
for resolver in resolver_results:
for dnssec in resolver_results[resolver]:
for keepalive in resolver_results[resolver][dnssec]:
for result in resolver_results[resolver][dnssec][keepalive]:
config_key = f"{get_dnssec_display_name(dnssec)} - {'PERSISTENT' if keepalive == 'on' else 'NEW CONN'}"
protocol_comparison[result['protocol']][config_key].append({
'resolver': resolver,
'avg_latency_ms': result['avg_latency_ms'],
'total_queries': result['total_queries']
})
for protocol in sorted(protocol_comparison.keys()):
print(f"\n{protocol} Protocol Comparison")
print("-" * 100)
# Protocol categories
self.df['protocol_category'] = self.df['protocol_base'].map({
'udp': 'Plain DNS',
'tls': 'DoT',
'https': 'DoH',
'doh3': 'DoH/3',
'doq': 'DoQ'
})
for config in sorted(protocol_comparison[protocol].keys()):
resolvers_data = protocol_comparison[protocol][config]
if resolvers_data:
print(f"\n {config}")
print(" " + "-" * 60)
print(f" {'Resolver':<15} {'Avg Latency (ms)':<20} {'Queries':<10}")
print(" " + "-" * 60)
# Sort by average latency
resolvers_data.sort(key=lambda x: x['avg_latency_ms'])
for data in resolvers_data:
print(f" {data['resolver']:<15} {data['avg_latency_ms']:<20} {data['total_queries']:<10}")
# Connection persistence
self.df['persistence'] = self.df['keep_alive'].fillna(False)
def generate_summary_statistics(self):
"""Generate comprehensive summary statistics"""
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
# Overall statistics
print("\n--- Overall Performance ---")
print(f"Total queries: {len(self.df)}")
print(f"Mean latency: {self.df['duration_ms'].mean():.2f} ms")
print(f"Median latency: {self.df['duration_ms'].median():.2f} ms")
print(f"95th percentile: {self.df['duration_ms'].quantile(0.95):.2f} ms")
print(f"99th percentile: {self.df['duration_ms'].quantile(0.99):.2f} ms")
# By protocol
print("\n--- Performance by Protocol ---")
protocol_stats = self.df.groupby('protocol_category')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('std', 'std'),
('p95', lambda x: x.quantile(0.95)),
('p99', lambda x: x.quantile(0.99))
]).round(2)
print(protocol_stats)
# By provider
print("\n--- Performance by Provider ---")
provider_stats = self.df.groupby('provider')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('std', 'std'),
('p95', lambda x: x.quantile(0.95))
]).round(2)
print(provider_stats)
# DNSSEC impact
print("\n--- DNSSEC Validation Impact ---")
dnssec_stats = self.df.groupby('dnssec_mode')['duration_ms'].agg([
('count', 'count'),
('mean', 'mean'),
('median', 'median'),
('overhead_vs_none', lambda x: x.mean())
]).round(2)
# Calculate overhead percentage
baseline = dnssec_stats.loc['none', 'mean'] if 'none' in dnssec_stats.index else 0
if baseline > 0:
dnssec_stats['overhead_pct'] = ((dnssec_stats['overhead_vs_none'] - baseline) / baseline * 100).round(1)
print(dnssec_stats)
# Bandwidth analysis
print("\n--- Bandwidth Usage ---")
bandwidth_stats = self.df.groupby('protocol_category').agg({
'request_size_bytes': ['mean', 'median'],
'response_size_bytes': ['mean', 'median']
}).round(2)
print(bandwidth_stats)
# Persistence impact (where applicable)
print("\n--- Connection Persistence Impact ---")
persist_protocols = self.df[self.df['protocol_base'].isin(['tls', 'https'])]
if len(persist_protocols) > 0:
persist_stats = persist_protocols.groupby(['protocol_base', 'persistence'])['duration_ms'].agg([
('mean', 'mean'),
('median', 'median')
]).round(2)
print(persist_stats)
return {
'protocol': protocol_stats,
'provider': provider_stats,
'dnssec': dnssec_stats,
'bandwidth': bandwidth_stats
}
def plot_latency_by_protocol(self, output_dir='plots'):
"""Violin plot of latency distribution by protocol"""
Path(output_dir).mkdir(exist_ok=True)
plt.figure(figsize=(14, 7))
# Order protocols logically
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available_protocols = [p for p in protocol_order if p in self.df['protocol_category'].values]
sns.violinplot(data=self.df, x='protocol_category', y='duration_ms',
order=available_protocols, inner='box', cut=0)
plt.title('DNS Query Latency Distribution by Protocol', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Response Time (ms)', fontsize=12)
plt.xticks(rotation=0)
# Add mean values as annotations
for i, protocol in enumerate(available_protocols):
mean_val = self.df[self.df['protocol_category'] == protocol]['duration_ms'].mean()
plt.text(i, mean_val, f'{mean_val:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.savefig(f'{output_dir}/latency_by_protocol.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: latency_by_protocol.png")
def plot_provider_comparison(self, output_dir='plots'):
"""Box plot comparing providers across protocols"""
Path(output_dir).mkdir(exist_ok=True)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Provider Performance Comparison by Protocol', fontsize=16, fontweight='bold')
protocols = self.df['protocol_category'].unique()
protocols = [p for p in ['Plain DNS', 'DoT', 'DoH', 'DoH/3'] if p in protocols]
for idx, protocol in enumerate(protocols[:4]):
ax = axes[idx // 2, idx % 2]
data = self.df[self.df['protocol_category'] == protocol]
if len(data) > 0:
sns.boxplot(data=data, x='provider', y='duration_ms', ax=ax)
ax.set_title(f'{protocol}', fontsize=12, fontweight='bold')
ax.set_xlabel('Provider', fontsize=10)
ax.set_ylabel('Response Time (ms)', fontsize=10)
ax.tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.savefig(f'{output_dir}/provider_comparison.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: provider_comparison.png")
def plot_dnssec_impact(self, output_dir='plots'):
"""Compare DNSSEC validation methods (trust vs auth)"""
Path(output_dir).mkdir(exist_ok=True)
# Filter for protocols that have DNSSEC variations
dnssec_data = self.df[self.df['dnssec_mode'] != 'none'].copy()
if len(dnssec_data) == 0:
print("⚠ No DNSSEC data available")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# Plot 1: Overall DNSSEC impact
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
sns.barplot(data=self.df, x='protocol_category', y='duration_ms',
hue='dnssec_mode', order=available, ax=ax1, ci=95)
ax1.set_title('DNSSEC Validation Overhead by Protocol', fontsize=12, fontweight='bold')
ax1.set_xlabel('Protocol', fontsize=10)
ax1.set_ylabel('Mean Response Time (ms)', fontsize=10)
ax1.legend(title='DNSSEC Mode', labels=['No DNSSEC', 'Auth (Full)', 'Trust (Resolver)'])
ax1.tick_params(axis='x', rotation=0)
# Plot 2: Trust vs Auth comparison
comparison_data = dnssec_data.groupby(['protocol_category', 'dnssec_mode'])['duration_ms'].mean().reset_index()
pivot_data = comparison_data.pivot(index='protocol_category', columns='dnssec_mode', values='duration_ms')
if 'auth' in pivot_data.columns and 'trust' in pivot_data.columns:
pivot_data['overhead_pct'] = ((pivot_data['auth'] - pivot_data['trust']) / pivot_data['trust'] * 100)
pivot_data['overhead_pct'].plot(kind='bar', ax=ax2, color='coral')
ax2.set_title('Auth vs Trust: Additional Overhead (%)', fontsize=12, fontweight='bold')
ax2.set_xlabel('Protocol', fontsize=10)
ax2.set_ylabel('Additional Overhead (%)', fontsize=10)
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
ax2.tick_params(axis='x', rotation=45)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/dnssec_impact.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: dnssec_impact.png")
def plot_persistence_impact(self, output_dir='plots'):
"""Analyze impact of connection persistence"""
Path(output_dir).mkdir(exist_ok=True)
persist_data = self.df[self.df['protocol_base'].isin(['tls', 'https'])].copy()
if len(persist_data) == 0:
print("⚠ No persistence data available")
return
plt.figure(figsize=(12, 6))
sns.barplot(data=persist_data, x='protocol_base', y='duration_ms',
hue='persistence', ci=95)
plt.title('Impact of Connection Persistence on Latency', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Mean Response Time (ms)', fontsize=12)
plt.legend(title='Keep-Alive', labels=['Disabled', 'Enabled'])
# Calculate and annotate overhead reduction
for protocol in persist_data['protocol_base'].unique():
protocol_data = persist_data[persist_data['protocol_base'] == protocol]
no_persist = protocol_data[protocol_data['persistence'] == False]['duration_ms'].mean()
with_persist = protocol_data[protocol_data['persistence'] == True]['duration_ms'].mean()
if not np.isnan(no_persist) and not np.isnan(with_persist):
reduction = ((no_persist - with_persist) / no_persist * 100)
print(f"{protocol}: {reduction:.1f}% reduction with persistence")
plt.tight_layout()
plt.savefig(f'{output_dir}/persistence_impact.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: persistence_impact.png")
def plot_bandwidth_overhead(self, output_dir='plots'):
"""Visualize bandwidth usage by protocol"""
Path(output_dir).mkdir(exist_ok=True)
bandwidth_data = self.df.groupby('protocol_category').agg({
'request_size_bytes': 'mean',
'response_size_bytes': 'mean'
}).reset_index()
bandwidth_data['total_bytes'] = (bandwidth_data['request_size_bytes'] +
bandwidth_data['response_size_bytes'])
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# Plot 1: Request vs Response sizes
x = np.arange(len(bandwidth_data))
width = 0.35
ax1.bar(x - width/2, bandwidth_data['request_size_bytes'], width,
label='Request', alpha=0.8)
ax1.bar(x + width/2, bandwidth_data['response_size_bytes'], width,
label='Response', alpha=0.8)
ax1.set_xlabel('Protocol', fontsize=12)
ax1.set_ylabel('Bytes', fontsize=12)
ax1.set_title('Average Request/Response Sizes', fontsize=12, fontweight='bold')
ax1.set_xticks(x)
ax1.set_xticklabels(bandwidth_data['protocol_category'])
ax1.legend()
ax1.grid(axis='y', alpha=0.3)
# Plot 2: Total bandwidth overhead vs UDP baseline
udp_total = bandwidth_data[bandwidth_data['protocol_category'] == 'Plain DNS']['total_bytes'].values
if len(udp_total) > 0:
bandwidth_data['overhead_vs_udp'] = ((bandwidth_data['total_bytes'] - udp_total[0]) / udp_total[0] * 100)
colors = ['green' if x < 0 else 'red' for x in bandwidth_data['overhead_vs_udp']]
ax2.bar(bandwidth_data['protocol_category'], bandwidth_data['overhead_vs_udp'],
color=colors, alpha=0.7)
ax2.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
ax2.set_xlabel('Protocol', fontsize=12)
ax2.set_ylabel('Overhead vs Plain DNS (%)', fontsize=12)
ax2.set_title('Bandwidth Overhead', fontsize=12, fontweight='bold')
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/bandwidth_overhead.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: bandwidth_overhead.png")
def plot_heatmap(self, output_dir='plots'):
"""Heatmap of provider-protocol performance"""
Path(output_dir).mkdir(exist_ok=True)
# Create pivot table
heatmap_data = self.df.groupby(['provider', 'protocol_category'])['duration_ms'].median().unstack()
plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn_r',
cbar_kws={'label': 'Median Latency (ms)'})
plt.title('DNS Provider-Protocol Performance Matrix', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Provider', fontsize=12)
plt.tight_layout()
plt.savefig(f'{output_dir}/provider_protocol_heatmap.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: provider_protocol_heatmap.png")
def plot_percentile_comparison(self, output_dir='plots'):
"""Plot percentile comparison across protocols"""
Path(output_dir).mkdir(exist_ok=True)
percentiles = [50, 75, 90, 95, 99]
protocol_order = ['Plain DNS', 'DoT', 'DoH', 'DoH/3', 'DoQ']
available = [p for p in protocol_order if p in self.df['protocol_category'].values]
percentile_data = []
for protocol in available:
data = self.df[self.df['protocol_category'] == protocol]['duration_ms']
for p in percentiles:
percentile_data.append({
'protocol': protocol,
'percentile': f'P{p}',
'latency': np.percentile(data, p)
})
percentile_df = pd.DataFrame(percentile_data)
plt.figure(figsize=(14, 7))
sns.barplot(data=percentile_df, x='protocol', y='latency', hue='percentile', order=available)
plt.title('Latency Percentiles by Protocol', fontsize=14, fontweight='bold')
plt.xlabel('Protocol', fontsize=12)
plt.ylabel('Response Time (ms)', fontsize=12)
plt.legend(title='Percentile', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig(f'{output_dir}/percentile_comparison.png', bbox_inches='tight')
plt.close()
print(f"✓ Saved: percentile_comparison.png")
def statistical_tests(self):
"""Perform statistical significance tests"""
print("\n" + "="*80)
print("STATISTICAL TESTS")
print("="*80)
# Test 1: Protocol differences (Kruskal-Wallis)
protocols = self.df['protocol_category'].unique()
if len(protocols) > 2:
groups = [self.df[self.df['protocol_category'] == p]['duration_ms'].values
for p in protocols]
h_stat, p_value = stats.kruskal(*groups)
print(f"\n--- Kruskal-Wallis Test (Protocol Differences) ---")
print(f"H-statistic: {h_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} differences between protocols")
# Test 2: DNSSEC impact (Mann-Whitney U)
if 'none' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
none_data = self.df[self.df['dnssec_mode'] == 'none']['duration_ms']
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
u_stat, p_value = stats.mannwhitneyu(none_data, auth_data, alternative='two-sided')
print(f"\n--- Mann-Whitney U Test (No DNSSEC vs Auth) ---")
print(f"U-statistic: {u_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} difference")
# Test 3: Trust vs Auth comparison
if 'trust' in self.df['dnssec_mode'].values and 'auth' in self.df['dnssec_mode'].values:
trust_data = self.df[self.df['dnssec_mode'] == 'trust']['duration_ms']
auth_data = self.df[self.df['dnssec_mode'] == 'auth']['duration_ms']
u_stat, p_value = stats.mannwhitneyu(trust_data, auth_data, alternative='two-sided')
print(f"\n--- Mann-Whitney U Test (Trust vs Auth) ---")
print(f"U-statistic: {u_stat:.4f}")
print(f"p-value: {p_value:.4e}")
print(f"Result: Auth is {'significantly' if p_value < 0.05 else 'not significantly'} slower than Trust")
def generate_latex_table(self, output_dir='plots'):
"""Generate LaTeX table for thesis"""
Path(output_dir).mkdir(exist_ok=True)
# Summary table by protocol
summary = self.df.groupby('protocol_category')['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('Std Dev', 'std'),
('P95', lambda x: x.quantile(0.95)),
('P99', lambda x: x.quantile(0.99))
]).round(2)
latex_code = summary.to_latex(float_format="%.2f")
with open(f'{output_dir}/summary_table.tex', 'w') as f:
f.write(latex_code)
print(f"✓ Saved: summary_table.tex")
print("\nLaTeX Table Preview:")
print(latex_code)
def run_full_analysis(self):
"""Run complete analysis pipeline"""
print("="*80)
print("DNS QoS Analysis - Starting Full Analysis")
print("="*80)
# Load data
print("\n[1/10] Loading data...")
self.load_all_data()
# Generate statistics
print("\n[2/10] Generating summary statistics...")
self.generate_summary_statistics()
# Statistical tests
print("\n[3/10] Running statistical tests...")
self.statistical_tests()
# Generate plots
print("\n[4/10] Creating latency by protocol plot...")
self.plot_latency_by_protocol()
print("\n[5/10] Creating provider comparison plot...")
self.plot_provider_comparison()
print("\n[6/10] Creating DNSSEC impact plot...")
self.plot_dnssec_impact()
print("\n[7/10] Creating persistence impact plot...")
self.plot_persistence_impact()
print("\n[8/10] Creating bandwidth overhead plot...")
self.plot_bandwidth_overhead()
print("\n[9/10] Creating heatmap...")
self.plot_heatmap()
print("\n[10/10] Creating percentile comparison...")
self.plot_percentile_comparison()
# Generate LaTeX table
print("\n[Bonus] Generating LaTeX table...")
self.generate_latex_table()
print("\n" + "="*80)
print("✓ Analysis Complete! Check the 'plots' directory for all visualizations.")
print("="*80)
if __name__ == "__main__":
root_dir = "."
output_file = "dns_metrics.csv"
analyze_dns_data(root_dir, output_file)
analyzer = DNSAnalyzer(results_dir='results')
analyzer.run_full_analysis()

View File

@@ -0,0 +1,536 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import datetime
from dateutil import parser as date_parser
import dpkt
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10
class FastDNSAnalyzer:
def __init__(self, results_dir='results'):
self.results_dir = Path(results_dir)
self.all_data = []
def should_include_file(self, filename):
"""Filter out DNSSEC and non-persist files"""
name = filename.stem
if 'auth' in name or 'trust' in name:
return False
if name in ['tls', 'https']:
return False
return True
def parse_rfc3339_nano(self, timestamp_str):
"""Parse RFC3339Nano timestamp with timezone"""
try:
dt = date_parser.parse(timestamp_str)
return dt.astimezone(datetime.timezone.utc).timestamp()
except Exception as e:
print(f" Error parsing timestamp {timestamp_str}: {e}")
return None
def extract_bandwidth_from_pcap_fast(self, pcap_file, csv_data):
"""Fast bandwidth extraction using dpkt"""
print(f" Analyzing pcap: {pcap_file.name}")
try:
with open(pcap_file, 'rb') as f:
pcap = dpkt.pcap.Reader(f)
# Build query time windows
query_windows = []
for idx, row in csv_data.iterrows():
start_time = self.parse_rfc3339_nano(row['timestamp'])
if start_time is None:
continue
duration_seconds = row['duration_ns'] / 1_000_000_000
end_time = start_time + duration_seconds
query_windows.append({
'index': idx,
'start': start_time,
'end': end_time,
'bytes_sent': 0,
'bytes_received': 0,
'packets_sent': 0,
'packets_received': 0
})
if not query_windows:
print(" ✗ No valid query windows")
return None
# Sort windows for faster matching
query_windows.sort(key=lambda x: x['start'])
# Process packets
packet_count = 0
matched_count = 0
for timestamp, buf in pcap:
packet_count += 1
packet_size = len(buf)
# Quick parse to determine direction
try:
eth = dpkt.ethernet.Ethernet(buf)
# Get IP layer
if isinstance(eth.data, dpkt.ip.IP):
ip = eth.data
elif isinstance(eth.data, dpkt.ip6.IP6):
ip = eth.data
else:
continue
# Get transport layer
if isinstance(ip.data, dpkt.udp.UDP):
transport = ip.data
src_port = transport.sport
dst_port = transport.dport
elif isinstance(ip.data, dpkt.tcp.TCP):
transport = ip.data
src_port = transport.sport
dst_port = transport.dport
else:
continue
# Determine direction (client port usually higher)
is_outbound = src_port > dst_port
# Binary search for matching window
for window in query_windows:
if window['start'] <= timestamp <= window['end']:
if is_outbound:
window['bytes_sent'] += packet_size
window['packets_sent'] += 1
else:
window['bytes_received'] += packet_size
window['packets_received'] += 1
matched_count += 1
break
elif timestamp < window['start']:
break # No more windows to check
except Exception:
continue
print(f" ✓ Processed {packet_count} packets, matched {matched_count}")
# Convert to DataFrame
bandwidth_df = pd.DataFrame(query_windows)
return bandwidth_df[['index', 'bytes_sent', 'bytes_received',
'packets_sent', 'packets_received']]
except Exception as e:
print(f" ✗ Error reading pcap: {e}")
return None
def load_data(self):
"""Load all relevant CSV files and extract bandwidth from pcaps"""
print("Loading data and analyzing bandwidth...")
for provider_dir in self.results_dir.iterdir():
if not provider_dir.is_dir():
continue
provider = provider_dir.name
for csv_file in provider_dir.glob('*.csv'):
if not self.should_include_file(csv_file):
continue
try:
df = pd.read_csv(csv_file)
df['provider'] = provider
df['test_file'] = csv_file.stem
df['csv_path'] = str(csv_file)
# Find corresponding pcap file
pcap_file = csv_file.with_suffix('.pcap')
if pcap_file.exists():
print(f" Processing: {provider}/{csv_file.name}")
bandwidth_data = self.extract_bandwidth_from_pcap_fast(pcap_file, df)
if bandwidth_data is not None and len(bandwidth_data) > 0:
# Merge bandwidth data
df = df.reset_index(drop=True)
for col in ['bytes_sent', 'bytes_received', 'packets_sent', 'packets_received']:
df[col] = 0
for _, row in bandwidth_data.iterrows():
idx = int(row['index'])
if idx < len(df):
df.at[idx, 'bytes_sent'] = row['bytes_sent']
df.at[idx, 'bytes_received'] = row['bytes_received']
df.at[idx, 'packets_sent'] = row['packets_sent']
df.at[idx, 'packets_received'] = row['packets_received']
df['total_bytes'] = df['bytes_sent'] + df['bytes_received']
print(f" ✓ Extracted bandwidth for {len(df)} queries")
else:
print(f" ⚠ Could not extract bandwidth data")
else:
print(f" ⚠ No pcap found for {csv_file.name}")
self.all_data.append(df)
except Exception as e:
print(f" ✗ Error loading {csv_file}: {e}")
import traceback
traceback.print_exc()
print(f"\nTotal files loaded: {len(self.all_data)}")
def create_line_graphs(self, output_dir='output/line_graphs'):
"""Create line graphs for latency and bandwidth"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating line graphs...")
for df in self.all_data:
provider = df['provider'].iloc[0]
test_name = df['test_file'].iloc[0]
df['query_index'] = range(1, len(df) + 1)
# Create figure with 2 subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
# Plot 1: Latency
ax1.plot(df['query_index'], df['duration_ms'], marker='o',
markersize=4, linewidth=1, alpha=0.7, color='steelblue')
mean_latency = df['duration_ms'].mean()
ax1.axhline(y=mean_latency, color='r', linestyle='--',
label=f'Mean: {mean_latency:.2f} ms', linewidth=2)
ax1.set_xlabel('Query Number', fontsize=12)
ax1.set_ylabel('Latency (ms)', fontsize=12)
ax1.set_title('Latency Over Time', fontsize=12, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Bandwidth
if 'total_bytes' in df.columns and df['total_bytes'].sum() > 0:
ax2.plot(df['query_index'], df['bytes_sent'], marker='s',
markersize=4, linewidth=1, alpha=0.7,
color='orange', label='Sent')
ax2.plot(df['query_index'], df['bytes_received'], marker='^',
markersize=4, linewidth=1, alpha=0.7,
color='green', label='Received')
mean_sent = df['bytes_sent'].mean()
mean_received = df['bytes_received'].mean()
ax2.axhline(y=mean_sent, color='orange', linestyle='--',
linewidth=1.5, alpha=0.5)
ax2.axhline(y=mean_received, color='green', linestyle='--',
linewidth=1.5, alpha=0.5)
ax2.set_xlabel('Query Number', fontsize=12)
ax2.set_ylabel('Bytes', fontsize=12)
ax2.set_title(f'Bandwidth Over Time (Mean: ↑{mean_sent:.0f}B ↓{mean_received:.0f}B)',
fontsize=12, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)
fig.suptitle(f'{provider.upper()} - {test_name}',
fontsize=14, fontweight='bold')
plt.tight_layout()
filename = f"{provider}_{test_name}.png"
plt.savefig(f'{output_dir}/{filename}', bbox_inches='tight')
plt.close()
print(f" ✓ Created: {filename}")
def get_protocol_name(self, test_file):
"""Extract clean protocol name"""
name = test_file.replace('-persist', '')
protocol_map = {
'udp': 'Plain DNS (UDP)',
'tls': 'DoT (DNS over TLS)',
'https': 'DoH (DNS over HTTPS)',
'doh3': 'DoH/3 (DNS over HTTP/3)',
'doq': 'DoQ (DNS over QUIC)'
}
return protocol_map.get(name, name.upper())
def create_resolver_comparison_bars(self, output_dir='output/comparisons'):
"""Create bar graphs comparing resolvers for latency and bandwidth"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating resolver comparison graphs...")
combined_df = pd.concat(self.all_data, ignore_index=True)
protocols = combined_df['test_file'].unique()
for protocol in protocols:
protocol_data = combined_df[combined_df['test_file'] == protocol]
protocol_name = self.get_protocol_name(protocol)
# Latency stats
latency_stats = protocol_data.groupby('provider')['duration_ms'].agg([
('mean', 'mean'),
('median', 'median'),
('std', 'std')
]).reset_index()
# Create latency comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle(f'{protocol_name} - Latency Comparison',
fontsize=16, fontweight='bold')
# Mean latency
bars1 = ax1.bar(latency_stats['provider'], latency_stats['mean'],
color='steelblue', alpha=0.8, edgecolor='black')
ax1.errorbar(latency_stats['provider'], latency_stats['mean'],
yerr=latency_stats['std'], fmt='none', color='black',
capsize=5, alpha=0.6)
for bar in bars1:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}',
ha='center', va='bottom', fontweight='bold')
ax1.set_xlabel('Resolver', fontsize=12)
ax1.set_ylabel('Mean Latency (ms)', fontsize=12)
ax1.set_title('Mean Latency', fontsize=12)
ax1.grid(axis='y', alpha=0.3)
# Median latency
bars2 = ax2.bar(latency_stats['provider'], latency_stats['median'],
color='coral', alpha=0.8, edgecolor='black')
for bar in bars2:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}',
ha='center', va='bottom', fontweight='bold')
ax2.set_xlabel('Resolver', fontsize=12)
ax2.set_ylabel('Median Latency (ms)', fontsize=12)
ax2.set_title('Median Latency', fontsize=12)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/latency_{protocol}.png', bbox_inches='tight')
plt.close()
print(f" ✓ Created: latency_{protocol}.png")
# Bandwidth comparison
if 'total_bytes' in protocol_data.columns and protocol_data['total_bytes'].sum() > 0:
bandwidth_stats = protocol_data.groupby('provider').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).reset_index()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle(f'{protocol_name} - Bandwidth Comparison',
fontsize=16, fontweight='bold')
# Sent vs Received
x = np.arange(len(bandwidth_stats))
width = 0.35
bars1 = ax1.bar(x - width/2, bandwidth_stats['bytes_sent'], width,
label='Sent', color='orange', alpha=0.8, edgecolor='black')
bars2 = ax1.bar(x + width/2, bandwidth_stats['bytes_received'], width,
label='Received', color='green', alpha=0.8, edgecolor='black')
ax1.set_xlabel('Resolver', fontsize=12)
ax1.set_ylabel('Bytes per Query', fontsize=12)
ax1.set_title('Average Bandwidth per Query', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(bandwidth_stats['provider'])
ax1.legend()
ax1.grid(axis='y', alpha=0.3)
# Total bandwidth
bars3 = ax2.bar(bandwidth_stats['provider'], bandwidth_stats['total_bytes'],
color='purple', alpha=0.8, edgecolor='black')
for bar in bars3:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.0f}',
ha='center', va='bottom', fontweight='bold')
ax2.set_xlabel('Resolver', fontsize=12)
ax2.set_ylabel('Total Bytes per Query', fontsize=12)
ax2.set_title('Total Bandwidth per Query', fontsize=12)
ax2.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f'{output_dir}/bandwidth_{protocol}.png', bbox_inches='tight')
plt.close()
print(f" ✓ Created: bandwidth_{protocol}.png")
def generate_latex_tables(self, output_dir='output/tables'):
"""Generate LaTeX tables with latency and bandwidth statistics"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("\nGenerating LaTeX tables...")
combined_df = pd.concat(self.all_data, ignore_index=True)
# Generate latency table for each resolver
for provider in combined_df['provider'].unique():
provider_data = combined_df[combined_df['provider'] == provider]
stats = provider_data.groupby('test_file')['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('Std Dev', 'std'),
('P95', lambda x: x.quantile(0.95)),
('P99', lambda x: x.quantile(0.99))
]).round(2)
stats.index = stats.index.map(self.get_protocol_name)
stats.index.name = 'Protocol'
latex_code = stats.to_latex(
caption=f'{provider.upper()} - Latency Statistics (ms)',
label=f'tab:{provider}_latency',
float_format="%.2f"
)
with open(f'{output_dir}/{provider}_latency.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: {provider}_latency.tex")
# Generate bandwidth table for each resolver
for provider in combined_df['provider'].unique():
provider_data = combined_df[combined_df['provider'] == provider]
if 'total_bytes' not in provider_data.columns or provider_data['total_bytes'].sum() == 0:
continue
bandwidth_stats = provider_data.groupby('test_file').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).round(2)
bandwidth_stats.columns = ['Avg Sent (B)', 'Avg Received (B)', 'Avg Total (B)']
bandwidth_stats.index = bandwidth_stats.index.map(self.get_protocol_name)
bandwidth_stats.index.name = 'Protocol'
latex_code = bandwidth_stats.to_latex(
caption=f'{provider.upper()} - Bandwidth Statistics',
label=f'tab:{provider}_bandwidth',
float_format="%.2f"
)
with open(f'{output_dir}/{provider}_bandwidth.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: {provider}_bandwidth.tex")
# Generate protocol efficiency table
print("\nGenerating protocol efficiency table...")
if 'total_bytes' in combined_df.columns and combined_df['total_bytes'].sum() > 0:
protocol_bandwidth = combined_df.groupby('test_file').agg({
'bytes_sent': 'mean',
'bytes_received': 'mean',
'total_bytes': 'mean'
}).round(2)
# Find UDP baseline
udp_baseline = None
for protocol in protocol_bandwidth.index:
if 'udp' in protocol:
udp_baseline = protocol_bandwidth.loc[protocol, 'total_bytes']
break
if udp_baseline and udp_baseline > 0:
protocol_bandwidth['Overhead vs UDP (%)'] = (
(protocol_bandwidth['total_bytes'] - udp_baseline) / udp_baseline * 100
).round(1)
protocol_bandwidth['Efficiency (%)'] = (
100 / (1 + protocol_bandwidth['Overhead vs UDP (%)'] / 100)
).round(1)
protocol_bandwidth.columns = ['Avg Sent (B)', 'Avg Received (B)',
'Avg Total (B)', 'Overhead (%)', 'Efficiency (%)']
protocol_bandwidth.index = protocol_bandwidth.index.map(self.get_protocol_name)
protocol_bandwidth.index.name = 'Protocol'
latex_code = protocol_bandwidth.to_latex(
caption='Protocol Bandwidth Efficiency Comparison',
label='tab:protocol_efficiency',
float_format="%.2f"
)
with open(f'{output_dir}/protocol_efficiency.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: protocol_efficiency.tex")
print("\n--- Protocol Efficiency ---")
print(protocol_bandwidth.to_string())
# Generate combined comparison tables
for metric in ['Mean', 'Median', 'P95']:
comparison_stats = combined_df.groupby(['provider', 'test_file'])['duration_ms'].agg([
('Mean', 'mean'),
('Median', 'median'),
('P95', lambda x: x.quantile(0.95))
]).round(2)
pivot_table = comparison_stats[metric].unstack(level=0)
pivot_table.index = pivot_table.index.map(self.get_protocol_name)
pivot_table.index.name = 'Protocol'
latex_code = pivot_table.to_latex(
caption=f'Resolver Latency Comparison - {metric} (ms)',
label=f'tab:comparison_{metric.lower()}',
float_format="%.2f"
)
with open(f'{output_dir}/comparison_{metric.lower()}.tex', 'w') as f:
f.write(latex_code)
print(f" ✓ Created: comparison_{metric.lower()}.tex")
def run_analysis(self):
"""Run the complete analysis"""
print("="*80)
print("Fast DNS QoS Analysis with Bandwidth")
print("="*80)
self.load_data()
if not self.all_data:
print("\n⚠ No data loaded.")
return
print("\n" + "="*80)
self.create_line_graphs()
print("\n" + "="*80)
self.create_resolver_comparison_bars()
print("\n" + "="*80)
self.generate_latex_tables()
print("\n" + "="*80)
print("✓ Analysis Complete!")
print("="*80)
if __name__ == "__main__":
analyzer = FastDNSAnalyzer(results_dir='results')
analyzer.run_analysis()

View File

@@ -0,0 +1,369 @@
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/google/gopacket"
"github.com/google/gopacket/layers"
"github.com/google/gopacket/pcapgo"
)
type QueryRecord struct {
Domain string
QueryType string
Protocol string
DNSSec string
AuthDNSSec string
KeepAlive string
DNSServer string
Timestamp string
DurationNs int64
DurationMs float64
RequestSizeBytes int
ResponseSizeBytes int
ResponseCode string
Error string
BytesSent int64
BytesReceived int64
PacketsSent int64
PacketsReceived int64
TotalBytes int64
}
func parseRFC3339Nano(ts string) (time.Time, error) {
return time.Parse(time.RFC3339Nano, ts)
}
func processProviderFolder(providerPath string) error {
providerName := filepath.Base(providerPath)
fmt.Printf("\n=== Processing provider: %s ===\n", providerName)
files, err := os.ReadDir(providerPath)
if err != nil {
return err
}
processed := 0
skipped := 0
errors := 0
for _, file := range files {
if !strings.HasSuffix(file.Name(), ".csv") {
continue
}
csvPath := filepath.Join(providerPath, file.Name())
pcapPath := strings.Replace(csvPath, ".csv", ".pcap", 1)
// Check if PCAP exists
if _, err := os.Stat(pcapPath); os.IsNotExist(err) {
fmt.Printf(" ⊗ Skipping: %s (no matching PCAP)\n", file.Name())
skipped++
continue
}
// Check if already processed (has backup)
backupPath := csvPath + ".bak"
if _, err := os.Stat(backupPath); err == nil {
fmt.Printf(" ⊙ Skipping: %s (already processed, backup exists)\n", file.Name())
skipped++
continue
}
fmt.Printf(" ↻ Processing: %s ... ", file.Name())
if err := processPair(csvPath, pcapPath); err != nil {
fmt.Printf("ERROR\n")
log.Printf(" Error: %v\n", err)
errors++
} else {
fmt.Printf("✓\n")
processed++
}
}
fmt.Printf(" Summary: %d processed, %d skipped, %d errors\n", processed, skipped, errors)
return nil
}
func processPair(csvPath, pcapPath string) error {
// Create backup
backupPath := csvPath + ".bak"
input, err := os.ReadFile(csvPath)
if err != nil {
return fmt.Errorf("backup read failed: %w", err)
}
if err := os.WriteFile(backupPath, input, 0644); err != nil {
return fmt.Errorf("backup write failed: %w", err)
}
// Read CSV records
records, err := readCSV(csvPath)
if err != nil {
return fmt.Errorf("CSV read failed: %w", err)
}
if len(records) == 0 {
return fmt.Errorf("no records in CSV")
}
// Read and parse PCAP
packets, err := readPCAPGo(pcapPath)
if err != nil {
return fmt.Errorf("PCAP read failed: %w", err)
}
// Enrich records with bandwidth data
enrichRecords(records, packets)
// Write enriched CSV
if err := writeCSV(csvPath, records); err != nil {
return fmt.Errorf("CSV write failed: %w", err)
}
return nil
}
func readCSV(path string) ([]*QueryRecord, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
r := csv.NewReader(f)
rows, err := r.ReadAll()
if err != nil {
return nil, err
}
if len(rows) < 2 {
return nil, fmt.Errorf("CSV has no data rows")
}
records := make([]*QueryRecord, 0, len(rows)-1)
for i := 1; i < len(rows); i++ {
row := rows[i]
if len(row) < 14 {
log.Printf(" Warning: Skipping malformed row %d", i+1)
continue
}
durationNs, _ := strconv.ParseInt(row[8], 10, 64)
durationMs, _ := strconv.ParseFloat(row[9], 64)
reqSize, _ := strconv.Atoi(row[10])
respSize, _ := strconv.Atoi(row[11])
records = append(records, &QueryRecord{
Domain: row[0],
QueryType: row[1],
Protocol: row[2],
DNSSec: row[3],
AuthDNSSec: row[4],
KeepAlive: row[5],
DNSServer: row[6],
Timestamp: row[7],
DurationNs: durationNs,
DurationMs: durationMs,
RequestSizeBytes: reqSize,
ResponseSizeBytes: respSize,
ResponseCode: row[12],
Error: row[13],
})
}
return records, nil
}
type PacketInfo struct {
Timestamp time.Time
Size int
IsSent bool
}
func readPCAPGo(path string) ([]PacketInfo, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
reader, err := pcapgo.NewReader(f)
if err != nil {
return nil, err
}
var packets []PacketInfo
packetSource := gopacket.NewPacketSource(reader, reader.LinkType())
for packet := range packetSource.Packets() {
if packet.NetworkLayer() == nil {
continue
}
isDNS := false
isSent := false
// Check UDP layer (DNS, DoQ, DoH3)
if udpLayer := packet.Layer(layers.LayerTypeUDP); udpLayer != nil {
udp := udpLayer.(*layers.UDP)
isDNS = udp.SrcPort == 53 || udp.DstPort == 53 ||
udp.SrcPort == 853 || udp.DstPort == 853 ||
udp.SrcPort == 443 || udp.DstPort == 443
isSent = udp.DstPort == 53 || udp.DstPort == 853 || udp.DstPort == 443
}
// Check TCP layer (DoT, DoH)
if tcpLayer := packet.Layer(layers.LayerTypeTCP); tcpLayer != nil {
tcp := tcpLayer.(*layers.TCP)
isDNS = tcp.SrcPort == 53 || tcp.DstPort == 53 ||
tcp.SrcPort == 853 || tcp.DstPort == 853 ||
tcp.SrcPort == 443 || tcp.DstPort == 443
isSent = tcp.DstPort == 53 || tcp.DstPort == 853 || tcp.DstPort == 443
}
if isDNS {
packets = append(packets, PacketInfo{
Timestamp: packet.Metadata().Timestamp,
Size: len(packet.Data()),
IsSent: isSent,
})
}
}
return packets, nil
}
func enrichRecords(records []*QueryRecord, packets []PacketInfo) {
for _, rec := range records {
ts, err := parseRFC3339Nano(rec.Timestamp)
if err != nil {
log.Printf(" Warning: Failed to parse timestamp: %s", rec.Timestamp)
continue
}
// Define time window for this query
windowStart := ts
windowEnd := ts.Add(time.Duration(rec.DurationNs))
var sent, recv, pktSent, pktRecv int64
// Match packets within the time window
for _, pkt := range packets {
if (pkt.Timestamp.Equal(windowStart) || pkt.Timestamp.After(windowStart)) &&
pkt.Timestamp.Before(windowEnd) {
if pkt.IsSent {
sent += int64(pkt.Size)
pktSent++
} else {
recv += int64(pkt.Size)
pktRecv++
}
}
}
rec.BytesSent = sent
rec.BytesReceived = recv
rec.PacketsSent = pktSent
rec.PacketsReceived = pktRecv
rec.TotalBytes = sent + recv
}
}
func writeCSV(path string, records []*QueryRecord) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
// Write header
header := []string{
"domain", "query_type", "protocol", "dnssec", "auth_dnssec",
"keep_alive", "dns_server", "timestamp", "duration_ns", "duration_ms",
"request_size_bytes", "response_size_bytes", "response_code", "error",
"bytes_sent", "bytes_received", "packets_sent", "packets_received", "total_bytes",
}
if err := w.Write(header); err != nil {
return err
}
// Write data rows
for _, rec := range records {
row := []string{
rec.Domain,
rec.QueryType,
rec.Protocol,
rec.DNSSec,
rec.AuthDNSSec,
rec.KeepAlive,
rec.DNSServer,
rec.Timestamp,
strconv.FormatInt(rec.DurationNs, 10),
strconv.FormatFloat(rec.DurationMs, 'f', -1, 64),
strconv.Itoa(rec.RequestSizeBytes),
strconv.Itoa(rec.ResponseSizeBytes),
rec.ResponseCode,
rec.Error,
strconv.FormatInt(rec.BytesSent, 10),
strconv.FormatInt(rec.BytesReceived, 10),
strconv.FormatInt(rec.PacketsSent, 10),
strconv.FormatInt(rec.PacketsReceived, 10),
strconv.FormatInt(rec.TotalBytes, 10),
}
if err := w.Write(row); err != nil {
return err
}
}
return nil
}
func main() {
resultsDir := "results"
providers := []string{"adguard", "cloudflare", "google", "quad9"}
fmt.Println("╔═══════════════════════════════════════════════╗")
fmt.Println("║ DNS PCAP Preprocessor v1.0 ║")
fmt.Println("║ Enriching ALL CSVs with bandwidth metrics ║")
fmt.Println("╚═══════════════════════════════════════════════╝")
totalProcessed := 0
totalSkipped := 0
totalErrors := 0
for _, provider := range providers {
providerPath := filepath.Join(resultsDir, provider)
if _, err := os.Stat(providerPath); os.IsNotExist(err) {
fmt.Printf("\n⚠ Provider folder not found: %s\n", provider)
continue
}
if err := processProviderFolder(providerPath); err != nil {
log.Printf("Error processing %s: %v\n", provider, err)
totalErrors++
}
}
fmt.Println("\n╔═══════════════════════════════════════════════╗")
fmt.Println("║ Preprocessing Complete! ║")
fmt.Println("╚═══════════════════════════════════════════════╝")
fmt.Printf("\nAll CSV files now have 5 additional columns:\n")
fmt.Printf(" • bytes_sent - Total bytes sent to DNS server\n")
fmt.Printf(" • bytes_received - Total bytes received from DNS server\n")
fmt.Printf(" • packets_sent - Number of packets sent\n")
fmt.Printf(" • packets_received - Number of packets received\n")
fmt.Printf(" • total_bytes - Sum of sent + received bytes\n")
fmt.Printf("\n📁 Backups saved as: *.csv.bak\n")
fmt.Printf("\n💡 Tip: The analysis script will filter which files to visualize,\n")
fmt.Printf(" but all files now have complete bandwidth metrics!\n")
}

View File

@@ -1,250 +1,362 @@
#!/usr/bin/env python3
"""
Add network metrics from PCAP files to DNS CSV files.
Adds: raw_bytes_total, raw_packet_count, overhead_bytes, efficiency_percent
Fast PCAP Preprocessor for DNS QoS Analysis
Loads PCAP into memory first, then uses binary search for matching.
Uses LAN IP to determine direction (LAN = sent, non-LAN = received).
"""
import csv
import os
import argparse
import re
import shutil
from pathlib import Path
from datetime import datetime, timezone
from scapy.all import rdpcap
from typing import Dict, List, NamedTuple
import time
def parse_timestamp(ts_str):
"""Parse timestamp with timezone and nanoseconds (RFC3339Nano)."""
match = re.match(
r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\.(\d+)([\+\-]\d{2}:\d{2})',
ts_str
)
if not match:
raise ValueError(f"Invalid timestamp format: {ts_str}")
base, nanos, tz = match.groups()
micros = nanos[:6].ljust(6, '0')
iso_str = f"{base}.{micros}{tz}"
dt = datetime.fromisoformat(iso_str)
full_nanos = int(nanos.ljust(9, '0'))
return dt, full_nanos
import dpkt
from dateutil import parser as date_parser
def read_pcap(pcap_path):
"""Read PCAP and return list of (timestamp_epoch, size)."""
class Packet(NamedTuple):
"""Lightweight packet representation."""
timestamp: float
size: int
is_outbound: bool # True if from LAN, False if from internet
class QueryWindow:
"""Efficient query window representation."""
__slots__ = ['index', 'start', 'end', 'sent', 'received', 'pkts_sent', 'pkts_received']
def __init__(self, index: int, start: float, end: float):
self.index = index
self.start = start
self.end = end
self.sent = 0
self.received = 0
self.pkts_sent = 0
self.pkts_received = 0
def parse_csv_timestamp(ts_str: str) -> float:
"""Convert RFC3339Nano timestamp to Unix epoch (seconds)."""
dt = date_parser.isoparse(ts_str)
return dt.timestamp()
def is_lan_ip(ip_bytes: bytes) -> bool:
"""Check if IP is a private/LAN address."""
if len(ip_bytes) != 4:
return False
first = ip_bytes[0]
second = ip_bytes[1]
# 10.0.0.0/8
if first == 10:
return True
# 172.16.0.0/12
if first == 172 and 16 <= second <= 31:
return True
# 192.168.0.0/16
if first == 192 and second == 168:
return True
# 127.0.0.0/8 (localhost)
if first == 127:
return True
return False
def load_pcap_into_memory(pcap_path: Path) -> List[Packet]:
"""Load all packets from PCAP into memory with minimal data."""
packets = []
print(f" Loading PCAP into memory...")
start_time = time.time()
try:
pkts = rdpcap(str(pcap_path))
for pkt in pkts:
timestamp = float(pkt.time)
length = len(pkt)
packets.append((timestamp, length))
with open(pcap_path, 'rb') as f:
try:
pcap = dpkt.pcap.Reader(f)
except:
# Try pcapng format
f.seek(0)
pcap = dpkt.pcapng.Reader(f)
for ts, buf in pcap:
try:
packet_time = float(ts)
packet_size = len(buf)
# Parse to get source IP
eth = dpkt.ethernet.Ethernet(buf)
# Default to outbound if we can't determine
is_outbound = True
if isinstance(eth.data, dpkt.ip.IP):
ip = eth.data
src_ip = ip.src
is_outbound = is_lan_ip(src_ip)
packets.append(Packet(
timestamp=packet_time,
size=packet_size,
is_outbound=is_outbound
))
except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError, AttributeError):
continue
except Exception as e:
print(f" Error reading PCAP: {e}")
print(f" Error reading PCAP: {e}")
return []
elapsed = time.time() - start_time
print(f" Loaded {len(packets):,} packets in {elapsed:.2f}s")
# Sort by timestamp for binary search
packets.sort(key=lambda p: p.timestamp)
return packets
def find_packets_in_window(packets, start_ts, start_nanos, duration_ns):
"""Find packets within exact time window."""
start_epoch = start_ts.timestamp()
start_epoch += (start_nanos % 1_000_000) / 1_000_000_000
end_epoch = start_epoch + (duration_ns / 1_000_000_000)
total_bytes = 0
packet_count = 0
for pkt_ts, pkt_len in packets:
if start_epoch <= pkt_ts <= end_epoch:
total_bytes += pkt_len
packet_count += 1
return total_bytes, packet_count
def enhance_csv(csv_path, pcap_path, output_path, debug=False):
"""Add PCAP metrics to CSV."""
if not os.path.exists(pcap_path):
print(f"⚠️ PCAP not found: {pcap_path}")
return False
print(f"Processing: {os.path.basename(csv_path)}")
# Read PCAP
packets = read_pcap(pcap_path)
print(f" Loaded {len(packets)} packets")
def find_packets_in_window(
packets: List[Packet],
start_time: float,
end_time: float,
left_hint: int = 0
) -> tuple[List[Packet], int]:
"""
Binary search to find all packets within time window.
Returns (matching_packets, left_index_hint_for_next_search).
"""
if not packets:
print(" ❌ No packets found")
return False
return [], 0
if packets and debug:
first_pcap = packets[0][0]
last_pcap = packets[-1][0]
print(f" First PCAP packet: {first_pcap:.6f}")
print(f" Last PCAP packet: {last_pcap:.6f}")
print(f" PCAP duration: {(last_pcap - first_pcap):.3f}s")
# Binary search for first packet >= start_time
left, right = left_hint, len(packets) - 1
first_idx = len(packets)
# Read CSV
with open(csv_path, 'r', newline='') as f:
reader = csv.DictReader(f)
fieldnames = list(reader.fieldnames) + [
'raw_bytes_total',
'raw_packet_count',
'overhead_bytes',
'efficiency_percent'
]
rows = list(reader)
while left <= right:
mid = (left + right) // 2
if packets[mid].timestamp >= start_time:
first_idx = mid
right = mid - 1
else:
left = mid + 1
if rows and debug:
try:
first_ts, _ = parse_timestamp(rows[0]['timestamp'])
last_ts, _ = parse_timestamp(rows[-1]['timestamp'])
print(f" First CSV query: {first_ts.timestamp():.6f}")
print(f" Last CSV query: {last_ts.timestamp():.6f}")
offset = packets[0][0] - first_ts.timestamp()
print(f" Time offset (PCAP - CSV): {offset:.3f}s")
except:
pass
# No packets in range
if first_idx >= len(packets) or packets[first_idx].timestamp > end_time:
return [], first_idx
# Enhance rows
enhanced = []
matched = 0
# Collect all packets in window
matching = []
idx = first_idx
while idx < len(packets) and packets[idx].timestamp <= end_time:
matching.append(packets[idx])
idx += 1
for i, row in enumerate(rows):
try:
timestamp, nanos = parse_timestamp(row['timestamp'])
duration_ns = int(row['duration_ns'])
raw_bytes, packet_count = find_packets_in_window(
packets, timestamp, nanos, duration_ns
)
useful_bytes = (
int(row['request_size_bytes']) +
int(row['response_size_bytes'])
)
overhead = raw_bytes - useful_bytes
efficiency = (
(useful_bytes / raw_bytes * 100)
if raw_bytes > 0 else 0
)
row['raw_bytes_total'] = raw_bytes
row['raw_packet_count'] = packet_count
row['overhead_bytes'] = overhead
row['efficiency_percent'] = f"{efficiency:.2f}"
if raw_bytes > 0:
matched += 1
# Debug first few queries
if debug and i < 3:
print(f" Query {i}: {row['domain']}")
print(f" Duration: {duration_ns / 1e6:.3f}ms")
print(f" Matched packets: {packet_count}")
print(f" Raw bytes: {raw_bytes}")
print(f" Useful bytes: {useful_bytes}")
print(f" Efficiency: {efficiency:.2f}%")
except (ValueError, KeyError) as e:
if debug:
print(f" Error processing row {i}: {e}")
row['raw_bytes_total'] = 0
row['raw_packet_count'] = 0
row['overhead_bytes'] = 0
row['efficiency_percent'] = "0.00"
enhanced.append(row)
print(f" Matched: {matched}/{len(rows)} queries")
if matched == 0:
print(" ⚠️ WARNING: No queries matched any packets!")
print(" This might indicate timestamp misalignment.")
# Write output
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
writer.writerows(enhanced)
print(f" ✓ Saved: {output_path}")
return True
return matching, first_idx
def main():
parser = argparse.ArgumentParser(
description='Add PCAP network metrics to DNS CSV files'
)
parser.add_argument('input_dir', help='Input directory (e.g., results)')
parser.add_argument(
'--output',
default='./results_enriched',
help='Output directory (default: ./results_enriched)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Preview files without processing'
)
parser.add_argument(
'--debug',
action='store_true',
help='Show detailed timing information'
)
def load_csv_queries(csv_path: Path) -> List[Dict]:
"""Load CSV and create query data structures."""
queries = []
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
ts_epoch = parse_csv_timestamp(row['timestamp'])
duration_s = float(row['duration_ns']) / 1e9
queries.append({
'data': row,
'start_time': ts_epoch,
'end_time': ts_epoch + duration_s,
})
except Exception as e:
print(f" Warning: Skipping row - {e}")
continue
return queries
def match_packets_to_queries(
packets: List[Packet],
queries: List[Dict]
) -> List[Dict]:
"""Match packets to query windows using binary search."""
if not queries or not packets:
return queries
args = parser.parse_args()
print(f" Matching packets to queries...")
start_time = time.time()
print("=" * 60)
print("ENHANCE DNS CSVs WITH PCAP METRICS")
print("=" * 60)
print(f"Input: {args.input_dir}")
print(f"Output: {args.output}")
if args.debug:
print("Debug: ENABLED")
print()
# Initialize metrics
for q in queries:
q['bytes_sent'] = 0
q['bytes_received'] = 0
q['packets_sent'] = 0
q['packets_received'] = 0
q['total_bytes'] = 0
# Find CSV files
csv_files = list(Path(args.input_dir).rglob('*.csv'))
# Sort queries by start time for sequential processing
queries_sorted = sorted(enumerate(queries), key=lambda x: x[1]['start_time'])
if not csv_files:
print("❌ No CSV files found")
return 1
matched_packets = 0
left_hint = 0 # Optimization: start next search from here
print(f"Found {len(csv_files)} CSV files\n")
for original_idx, q in queries_sorted:
matching, left_hint = find_packets_in_window(
packets,
q['start_time'],
q['end_time'],
left_hint
)
for pkt in matching:
matched_packets += 1
if pkt.is_outbound:
q['bytes_sent'] += pkt.size
q['packets_sent'] += 1
else:
q['bytes_received'] += pkt.size
q['packets_received'] += 1
q['total_bytes'] = q['bytes_sent'] + q['bytes_received']
if args.dry_run:
print("DRY RUN - would process:")
for csv_path in csv_files:
pcap_path = csv_path.with_suffix('.pcap')
print(f" {csv_path.relative_to(args.input_dir)}")
print(f" PCAP: {'' if pcap_path.exists() else ''}")
return 0
elapsed = time.time() - start_time
print(f" Matched {matched_packets:,} packets in {elapsed:.2f}s")
# Process files
success = 0
failed = 0
# Statistics
total_sent = sum(q['bytes_sent'] for q in queries)
total_recv = sum(q['bytes_received'] for q in queries)
queries_with_data = sum(1 for q in queries if q['total_bytes'] > 0)
print(f" Total: {total_sent:,} bytes sent, {total_recv:,} bytes received")
print(f" Queries with data: {queries_with_data}/{len(queries)}")
return queries
def write_enriched_csv(
csv_path: Path, queries: List[Dict], backup: bool = True
):
"""Write enriched CSV with bandwidth columns."""
if backup and csv_path.exists():
backup_path = csv_path.with_suffix('.csv.bak')
if not backup_path.exists(): # Don't overwrite existing backup
shutil.copy2(csv_path, backup_path)
print(f" Backup: {backup_path.name}")
# Get fieldnames
original_fields = list(queries[0]['data'].keys())
new_fields = [
'bytes_sent',
'bytes_received',
'packets_sent',
'packets_received',
'total_bytes',
]
fieldnames = original_fields + new_fields
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for q in queries:
row = q['data'].copy()
for field in new_fields:
row[field] = q[field]
writer.writerow(row)
print(f" Written: {csv_path.name}")
def process_provider_directory(provider_path: Path):
"""Process all CSV/PCAP pairs in a provider directory."""
print(f"\n{'='*60}")
print(f"Processing: {provider_path.name.upper()}")
print(f"{'='*60}")
csv_files = sorted(provider_path.glob('*.csv'))
processed = 0
total_time = 0
for csv_path in csv_files:
pcap_path = csv_path.with_suffix('.pcap')
rel_path = csv_path.relative_to(args.input_dir)
output_path = Path(args.output) / rel_path
# Skip backup files
if '.bak' in csv_path.name:
continue
if enhance_csv(str(csv_path), str(pcap_path), str(output_path),
args.debug):
success += 1
else:
failed += 1
print()
pcap_path = csv_path.with_suffix('.pcap')
if not pcap_path.exists():
print(f"\n ⚠ Skipping {csv_path.name} - no matching PCAP")
continue
print(f"\n 📁 {csv_path.name}")
file_start = time.time()
# Load PCAP into memory first
packets = load_pcap_into_memory(pcap_path)
if not packets:
print(f" ⚠ No packets found in PCAP")
continue
# Load CSV queries
queries = load_csv_queries(csv_path)
if not queries:
print(f" ⚠ No valid queries found")
continue
print(f" Loaded {len(queries):,} queries")
# Match packets to queries
enriched_queries = match_packets_to_queries(packets, queries)
# Write enriched CSV
write_enriched_csv(csv_path, enriched_queries)
file_time = time.time() - file_start
total_time += file_time
processed += 1
print(f" ✓ Completed in {file_time:.2f}s")
# Summary
print("=" * 60)
print(f"✓ Success: {success}")
print(f"✗ Failed: {failed}")
print(f"Total: {len(csv_files)}")
print(f"\nOutput: {args.output}")
return 0 if failed == 0 else 1
print(f"\n {'='*58}")
print(f" {provider_path.name}: {processed} files in {total_time:.2f}s")
print(f" {'='*58}")
if __name__ == "__main__":
exit(main())
def main():
"""Main preprocessing pipeline."""
overall_start = time.time()
print("\n" + "="*60)
print("DNS PCAP PREPROCESSOR - Memory-Optimized Edition")
print("="*60)
results_dir = Path('results')
if not results_dir.exists():
print(f"\n❌ Error: '{results_dir}' directory not found")
return
providers = ['adguard', 'cloudflare', 'google', 'quad9']
for provider in providers:
provider_path = results_dir / provider
if provider_path.exists():
process_provider_directory(provider_path)
else:
print(f"\n⚠ Warning: Provider directory not found: {provider}")
overall_time = time.time() - overall_start
print("\n" + "="*60)
print(f"✓ PREPROCESSING COMPLETE")
print(f" Total time: {overall_time:.2f}s ({overall_time/60:.1f} minutes)")
print("="*60 + "\n")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,426 @@
#!/usr/bin/env python3
"""
Convert DNS CSV files to SQLite database.
Creates a single normalized table with unified DNSSEC handling.
"""
import sqlite3
import csv
from pathlib import Path
from dateutil import parser as date_parser
def create_database_schema(conn: sqlite3.Connection):
"""Create the database schema with indexes."""
cursor = conn.cursor()
# Main queries table
cursor.execute("""
CREATE TABLE IF NOT EXISTS dns_queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
-- Metadata
provider TEXT NOT NULL,
protocol TEXT NOT NULL,
dnssec_mode TEXT NOT NULL CHECK(dnssec_mode IN ('off', 'auth', 'trust')),
-- Query details
domain TEXT NOT NULL,
query_type TEXT NOT NULL,
keep_alive BOOLEAN NOT NULL,
dns_server TEXT NOT NULL,
-- Timing
timestamp TEXT NOT NULL,
timestamp_unix REAL NOT NULL,
duration_ns INTEGER NOT NULL,
duration_ms REAL NOT NULL,
-- Size metrics
request_size_bytes INTEGER,
response_size_bytes INTEGER,
-- Network metrics (from PCAP)
bytes_sent INTEGER DEFAULT 0,
bytes_received INTEGER DEFAULT 0,
packets_sent INTEGER DEFAULT 0,
packets_received INTEGER DEFAULT 0,
total_bytes INTEGER DEFAULT 0,
-- Response
response_code TEXT,
error TEXT
)
""")
# Create indexes for common queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_provider
ON dns_queries(provider)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_protocol
ON dns_queries(protocol)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_dnssec_mode
ON dns_queries(dnssec_mode)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_keep_alive
ON dns_queries(keep_alive)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_provider_protocol_dnssec
ON dns_queries(provider, protocol, dnssec_mode)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_timestamp
ON dns_queries(timestamp_unix)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_domain
ON dns_queries(domain)
""")
conn.commit()
def parse_protocol_and_dnssec(filename: str) -> tuple[str, str, bool]:
"""
Extract base protocol, DNSSEC mode, and keep_alive from filename.
Returns (base_protocol, dnssec_mode, keep_alive)
Examples:
'udp.csv' -> ('udp', 'off', False)
'udp-auth.csv' -> ('udp', 'auth', False)
'tls.csv' -> ('tls', 'off', False)
'tls-persist.csv' -> ('tls', 'off', True)
'https-persist.csv' -> ('https', 'off', True)
'https-auth-persist.csv' -> ('https', 'auth', True)
'https-trust-persist.csv' -> ('https', 'trust', True)
'doh3-auth.csv' -> ('doh3', 'auth', False)
'doq.csv' -> ('doq', 'off', False)
"""
name = filename.replace('.csv', '')
# Check for persist suffix (keep_alive)
keep_alive = False
if name.endswith('-persist'):
keep_alive = True
name = name.replace('-persist', '')
# Check for DNSSEC suffix
dnssec_mode = 'off'
if name.endswith('-auth'):
dnssec_mode = 'auth'
name = name.replace('-auth', '')
elif name.endswith('-trust'):
dnssec_mode = 'trust'
name = name.replace('-trust', '')
# For UDP, DoH3, and DoQ, keep_alive doesn't apply (connectionless)
if name in ['udp', 'doh3', 'doq']:
keep_alive = False
return (name, dnssec_mode, keep_alive)
def str_to_bool(value: str) -> bool:
"""Convert string boolean to Python bool."""
return value.lower() in ('true', '1', 'yes')
def import_csv_to_db(
csv_path: Path,
provider: str,
conn: sqlite3.Connection
) -> int:
"""Import a CSV file into the database."""
protocol, dnssec_mode, keep_alive_from_filename = parse_protocol_and_dnssec(csv_path.name)
cursor = conn.cursor()
rows_imported = 0
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
# Parse timestamp to Unix epoch
dt = date_parser.isoparse(row['timestamp'])
timestamp_unix = dt.timestamp()
# Use keep_alive from filename (more reliable than CSV)
keep_alive = keep_alive_from_filename
# Handle optional fields (may not exist in older CSVs)
bytes_sent = int(row.get('bytes_sent', 0) or 0)
bytes_received = int(row.get('bytes_received', 0) or 0)
packets_sent = int(row.get('packets_sent', 0) or 0)
packets_received = int(row.get('packets_received', 0) or 0)
total_bytes = int(row.get('total_bytes', 0) or 0)
cursor.execute("""
INSERT INTO dns_queries (
provider, protocol, dnssec_mode,
domain, query_type, keep_alive,
dns_server, timestamp, timestamp_unix,
duration_ns, duration_ms,
request_size_bytes, response_size_bytes,
bytes_sent, bytes_received, packets_sent, packets_received, total_bytes,
response_code, error
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
provider,
protocol,
dnssec_mode,
row['domain'],
row['query_type'],
keep_alive,
row['dns_server'],
row['timestamp'],
timestamp_unix,
int(row['duration_ns']),
float(row['duration_ms']),
int(row.get('request_size_bytes') or 0),
int(row.get('response_size_bytes') or 0),
bytes_sent,
bytes_received,
packets_sent,
packets_received,
total_bytes,
row.get('response_code', ''),
row.get('error', '')
))
rows_imported += 1
except Exception as e:
print(f" Warning: Skipping row - {e}")
continue
conn.commit()
return rows_imported
def main():
"""Main import pipeline."""
print("\n" + "="*60)
print("CSV to SQLite Database Converter")
print("="*60)
results_dir = Path('results')
db_path = Path('dns.db')
if not results_dir.exists():
print(f"\n❌ Error: '{results_dir}' directory not found")
return
# Remove existing database
if db_path.exists():
print(f"\n⚠ Removing existing database: {db_path}")
db_path.unlink()
# Create database and schema
print(f"\n📊 Creating database: {db_path}")
conn = sqlite3.connect(db_path)
create_database_schema(conn)
print("✓ Schema created")
# Import CSVs
providers = ['adguard', 'cloudflare', 'google', 'quad9']
total_rows = 0
total_files = 0
for provider in providers:
provider_path = results_dir / provider
if not provider_path.exists():
print(f"\n⚠ Skipping {provider} - directory not found")
continue
print(f"\n{'='*60}")
print(f"Importing: {provider.upper()}")
print(f"{'='*60}")
csv_files = sorted(provider_path.glob('*.csv'))
provider_rows = 0
provider_files = 0
for csv_path in csv_files:
# Skip backup files
if '.bak' in csv_path.name:
continue
protocol, dnssec, keep_alive = parse_protocol_and_dnssec(csv_path.name)
ka_str = "persistent" if keep_alive else "non-persist"
print(f" 📄 {csv_path.name:30}{protocol:8} (DNSSEC: {dnssec:5}, {ka_str})")
rows = import_csv_to_db(csv_path, provider, conn)
print(f" ✓ Imported {rows:,} rows")
provider_rows += rows
provider_files += 1
print(f"\n Total: {provider_files} files, {provider_rows:,} rows")
total_rows += provider_rows
total_files += provider_files
# Create summary
print(f"\n{'='*60}")
print("Database Summary")
print(f"{'='*60}")
cursor = conn.cursor()
# Total counts
cursor.execute("SELECT COUNT(*) FROM dns_queries")
total_queries = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT provider) FROM dns_queries")
unique_providers = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT protocol) FROM dns_queries")
unique_protocols = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(DISTINCT domain) FROM dns_queries")
unique_domains = cursor.fetchone()[0]
print(f"\nTotal queries: {total_queries:,}")
print(f"Providers: {unique_providers}")
print(f"Protocols: {unique_protocols}")
print(f"Unique domains: {unique_domains}")
# Show breakdown by provider, protocol, DNSSEC, and keep_alive
print(f"\nBreakdown by Provider, Protocol, DNSSEC & Keep-Alive:")
print(f"{'-'*80}")
cursor.execute("""
SELECT provider, protocol, dnssec_mode, keep_alive, COUNT(*) as count
FROM dns_queries
GROUP BY provider, protocol, dnssec_mode, keep_alive
ORDER BY provider, protocol, dnssec_mode, keep_alive
""")
current_provider = None
for provider, protocol, dnssec, keep_alive, count in cursor.fetchall():
if current_provider != provider:
if current_provider is not None:
print()
current_provider = provider
ka_str = "" if keep_alive else ""
print(f" {provider:12} | {protocol:8} | {dnssec:5} | KA:{ka_str} | {count:6,} queries")
# Protocol distribution
print(f"\n{'-'*80}")
print("Protocol Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT protocol, COUNT(*) as count
FROM dns_queries
GROUP BY protocol
ORDER BY protocol
""")
for protocol, count in cursor.fetchall():
pct = (count / total_queries) * 100
print(f" {protocol:8} | {count:8,} queries ({pct:5.1f}%)")
# DNSSEC mode distribution
print(f"\n{'-'*80}")
print("DNSSEC Mode Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT dnssec_mode, COUNT(*) as count
FROM dns_queries
GROUP BY dnssec_mode
ORDER BY dnssec_mode
""")
for dnssec_mode, count in cursor.fetchall():
pct = (count / total_queries) * 100
print(f" {dnssec_mode:5} | {count:8,} queries ({pct:5.1f}%)")
# Keep-Alive distribution
print(f"\n{'-'*80}")
print("Keep-Alive Distribution:")
print(f"{'-'*80}")
cursor.execute("""
SELECT keep_alive, COUNT(*) as count
FROM dns_queries
GROUP BY keep_alive
""")
for keep_alive, count in cursor.fetchall():
ka_label = "Persistent" if keep_alive else "Non-persistent"
pct = (count / total_queries) * 100
print(f" {ka_label:15} | {count:8,} queries ({pct:5.1f}%)")
conn.close()
print(f"\n{'='*60}")
print(f"✓ Database created successfully: {db_path}")
print(f" Total: {total_files} files, {total_rows:,} rows")
print(f"{'='*60}\n")
# Print usage examples
print("\n📖 Usage Examples for Metabase:")
print(f"{'-'*60}")
print("\n1. Compare protocols (DNSSEC off, persistent only):")
print(""" SELECT provider, protocol,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
WHERE dnssec_mode = 'off' AND keep_alive = 1
GROUP BY provider, protocol;""")
print("\n2. DNSSEC impact on UDP:")
print(""" SELECT provider, dnssec_mode,
AVG(duration_ms) as avg_latency
FROM dns_queries
WHERE protocol = 'udp'
GROUP BY provider, dnssec_mode;""")
print("\n3. Keep-alive impact on TLS:")
print(""" SELECT provider, keep_alive,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
WHERE protocol = 'tls' AND dnssec_mode = 'off'
GROUP BY provider, keep_alive;""")
print("\n4. Time series for line graphs:")
print(""" SELECT timestamp_unix, duration_ms, total_bytes
FROM dns_queries
WHERE provider = 'cloudflare'
AND protocol = 'https'
AND dnssec_mode = 'off'
AND keep_alive = 1
ORDER BY timestamp_unix;""")
print("\n5. Overall comparison table:")
print(""" SELECT protocol, dnssec_mode, keep_alive,
COUNT(*) as queries,
AVG(duration_ms) as avg_latency,
AVG(total_bytes) as avg_bytes
FROM dns_queries
GROUP BY protocol, dnssec_mode, keep_alive
ORDER BY protocol, dnssec_mode, keep_alive;""")
print(f"\n{'-'*60}\n")
if __name__ == '__main__':
main()