Skip to main content

anomalyx_normalize/parsers/
dns.rs

1//! DNS query log parser — dnsmasq / Pi-hole style query lines.
2//!
3//! DNS is a favorite covert channel, so beyond extracting the query we compute
4//! the features that expose tunnelling: `qname_length` and `qname_entropy`
5//! (Shannon entropy of the query name) feed `point` detection of DGA / exfil
6//! names (long, high-entropy), and `timestamp_epoch` feeds `cadence` on query
7//! timing (beaconing). `qtype` (e.g. `TXT`) and `client` round out the row.
8//!
9//! Parses the dnsmasq query line shape `<time> dnsmasq[pid]: query[TYPE] NAME
10//! from CLIENT`; non-query lines (forwarded/reply/cached) produce no rows. The
11//! BSD timestamp has no year, so it is parsed with a fixed sentinel year (UTC) —
12//! deterministic, never the wall clock. Detected by a parseable query line;
13//! claims no extension (DNS logs are generically `*.log`).
14
15use crate::parser::{Confidence, FormatParser, STRONG};
16use crate::table::TableBuilder;
17use ax_core::{AxError, Column, Value};
18use std::collections::BTreeMap;
19
20#[derive(Debug, Default, Clone)]
21pub struct DnsParser;
22
23/// The RFC 3164 timestamp carries no year; pin a sentinel so the epoch is
24/// deterministic (the month/day/time carry the real information).
25const SENTINEL_YEAR: i32 = 1970;
26
27/// One parsed dnsmasq query line.
28struct DnsQuery<'a> {
29    timestamp: Option<&'a str>,
30    qtype: &'a str,
31    qname: &'a str,
32    client: &'a str,
33}
34
35/// Shannon entropy (bits) of a string's character distribution. High for the
36/// random-looking labels of DGA domains and base32/64 exfil payloads.
37fn shannon_entropy(s: &str) -> f64 {
38    let mut counts: BTreeMap<char, usize> = BTreeMap::new();
39    let mut total = 0usize;
40    for c in s.chars() {
41        *counts.entry(c).or_insert(0) += 1;
42        total += 1;
43    }
44    if total == 0 {
45        return 0.0;
46    }
47    let len = total as f64;
48    let mut entropy = 0.0;
49    for &count in counts.values() {
50        let p = count as f64 / len;
51        entropy -= p * p.log2();
52    }
53    entropy
54}
55
56/// Parses the dnsmasq BSD timestamp (`Mmm dd HH:MM:SS`, no year) to Unix seconds
57/// using the sentinel year and UTC. `None` if it doesn't parse.
58fn parse_epoch(timestamp: &str) -> Option<i64> {
59    let stamped = format!("{SENTINEL_YEAR} {timestamp}");
60    chrono::NaiveDateTime::parse_from_str(&stamped, "%Y %b %e %H:%M:%S")
61        .ok()
62        .map(|dt| dt.and_utc().timestamp())
63}
64
65/// Parses a `... query[TYPE] NAME from CLIENT` line. `None` for non-query lines
66/// (the type must begin with an uppercase letter, ruling out prose like
67/// `query[0] x from y`).
68fn parse_query(line: &str) -> Option<DnsQuery<'_>> {
69    let after = line.split_once("query[")?.1;
70    let (qtype, rest) = after.split_once(']')?;
71    if !qtype.chars().next().is_some_and(|c| c.is_ascii_uppercase()) {
72        return None;
73    }
74    let (qname, client) = rest.trim_start().split_once(" from ")?;
75    let qname = qname.trim();
76    let client = client.trim();
77    if qname.is_empty() {
78        return None;
79    }
80    // The BSD timestamp is the leading 15 ASCII chars, when present.
81    let timestamp = line.get(..15);
82    Some(DnsQuery {
83        timestamp,
84        qtype,
85        qname,
86        client,
87    })
88}
89
90impl DnsParser {
91    fn err(&self, msg: impl std::fmt::Display) -> AxError {
92        AxError::Parse {
93            format: self.id().to_string(),
94            message: msg.to_string(),
95        }
96    }
97}
98
99impl FormatParser for DnsParser {
100    fn id(&self) -> &'static str {
101        "dns"
102    }
103    fn extensions(&self) -> &'static [&'static str] {
104        &[]
105    }
106    fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
107        let text = std::str::from_utf8(bytes).ok()?;
108        text.lines()
109            .take(64)
110            .any(|l| parse_query(l).is_some())
111            .then_some(STRONG)
112    }
113    fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
114        let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
115        let mut builder = TableBuilder::new();
116        let mut queries = 0usize;
117        for line in text.lines() {
118            let Some(q) = parse_query(line) else {
119                continue; // forwarded/reply/cached/config lines are not queries
120            };
121            let mut row: BTreeMap<String, Value> = BTreeMap::new();
122            if let Some(ts) = q.timestamp {
123                row.insert("timestamp".into(), Value::Str(ts.to_string()));
124                if let Some(epoch) = parse_epoch(ts) {
125                    row.insert("timestamp_epoch".into(), Value::Int(epoch));
126                }
127            }
128            row.insert("qtype".into(), Value::Str(q.qtype.to_string()));
129            row.insert("qname".into(), Value::Str(q.qname.to_string()));
130            row.insert(
131                "qname_length".into(),
132                Value::Int(q.qname.chars().count() as i64),
133            );
134            row.insert(
135                "qname_entropy".into(),
136                Value::Float(shannon_entropy(q.qname)),
137            );
138            row.insert("client".into(), Value::Str(q.client.to_string()));
139            builder.push_row(row);
140            queries += 1;
141        }
142        if queries == 0 {
143            return Err(self.err("no DNS query lines found"));
144        }
145        Ok(builder.finish())
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use ax_core::ColType;
153
154    const DNS: &str = "\
155Jan  1 00:00:00 dnsmasq[1234]: query[A] example.com from 10.0.0.1
156Jan  1 00:00:00 dnsmasq[1234]: forwarded example.com to 8.8.8.8
157Jan  1 00:00:01 dnsmasq[1234]: reply example.com is 1.2.3.4
158Jan  1 00:00:05 dnsmasq[1234]: query[TXT] aGVsbG8gZXhmaWwK.evil.example from 10.0.0.2
159";
160
161    fn parse(s: &str) -> Vec<Column> {
162        DnsParser.parse("-", s.as_bytes()).unwrap()
163    }
164    fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
165        cols.iter()
166            .find(|c| c.name == name)
167            .unwrap_or_else(|| panic!("missing column {name}"))
168    }
169
170    #[test]
171    fn only_query_lines_become_rows() {
172        let cols = parse(DNS);
173        // forwarded + reply lines are skipped → 2 query rows.
174        assert_eq!(col(&cols, "qname").cells.len(), 2);
175        assert_eq!(
176            col(&cols, "qname").cells,
177            vec![
178                Value::Str("example.com".into()),
179                Value::Str("aGVsbG8gZXhmaWwK.evil.example".into())
180            ]
181        );
182        assert_eq!(
183            col(&cols, "qtype").cells,
184            vec![Value::Str("A".into()), Value::Str("TXT".into())]
185        );
186        assert_eq!(col(&cols, "client").cells[1], Value::Str("10.0.0.2".into()));
187    }
188
189    #[test]
190    fn computed_features_for_dga_exfil_detection() {
191        let cols = parse(DNS);
192        let len = col(&cols, "qname_length");
193        assert_eq!(len.ty, ColType::Int);
194        assert_eq!(len.cells[0], Value::Int(11)); // "example.com"
195        let entropy = col(&cols, "qname_entropy");
196        assert_eq!(entropy.ty, ColType::Float);
197        // The exfil-style name has higher entropy than the plain domain.
198        let (Value::Float(plain), Value::Float(exfil)) = (&entropy.cells[0], &entropy.cells[1])
199        else {
200            panic!("expected float entropies")
201        };
202        assert!(exfil > plain, "{exfil} should exceed {plain}");
203    }
204
205    #[test]
206    fn timestamp_parsed_to_epoch_with_sentinel_year() {
207        let cols = parse(DNS);
208        let epoch = col(&cols, "timestamp_epoch");
209        assert_eq!(epoch.ty, ColType::Int);
210        // 1970-01-01 00:00:00 UTC = 0; the second query is 5s later.
211        assert_eq!(epoch.cells, vec![Value::Int(0), Value::Int(5)]);
212        assert_eq!(
213            col(&cols, "timestamp").cells[0],
214            Value::Str("Jan  1 00:00:00".into())
215        );
216    }
217
218    #[test]
219    fn shannon_entropy_units() {
220        assert_eq!(shannon_entropy(""), 0.0);
221        assert_eq!(shannon_entropy("aaaa"), 0.0); // one symbol → no entropy
222        assert_eq!(shannon_entropy("ab"), 1.0); // two equal symbols → 1 bit
223        assert_eq!(shannon_entropy("aabb"), 1.0);
224        assert_eq!(shannon_entropy("abcd"), 2.0); // four equal symbols → 2 bits
225    }
226
227    #[test]
228    fn parse_epoch_units() {
229        assert_eq!(parse_epoch("Jan  1 00:00:00"), Some(0));
230        assert_eq!(parse_epoch("Jan  1 00:00:05"), Some(5));
231        assert_eq!(parse_epoch("not a timestamp"), None);
232    }
233
234    #[test]
235    fn parse_query_units() {
236        let q = parse_query("Jan  1 00:00:00 dnsmasq[1]: query[A] a.com from 1.2.3.4").unwrap();
237        assert_eq!(q.qtype, "A");
238        assert_eq!(q.qname, "a.com");
239        assert_eq!(q.client, "1.2.3.4");
240        // Non-query lines and prose are rejected.
241        assert!(parse_query("Jan  1 00:00:00 dnsmasq[1]: forwarded a.com to 8.8.8.8").is_none());
242        assert!(parse_query("the query[0] index from array").is_none()); // type not uppercase
243        assert!(parse_query("query[A]  from 1.2.3.4").is_none()); // empty name
244    }
245
246    #[test]
247    fn no_query_lines_is_an_error() {
248        assert!(matches!(
249            DnsParser.parse("-", b"just some text\nno queries here\n"),
250            Err(AxError::Parse { .. })
251        ));
252        assert!(matches!(
253            DnsParser.parse("-", b""),
254            Err(AxError::Parse { .. })
255        ));
256    }
257
258    #[test]
259    fn sniff_keys_on_a_query_line() {
260        assert_eq!(DnsParser.sniff(DNS.as_bytes()), Some(STRONG));
261        // A log that starts with non-query lines still sniffs (scans ahead).
262        assert_eq!(
263            DnsParser.sniff(b"Jan  1 00:00:00 dnsmasq[1]: started\nJan  1 00:00:01 dnsmasq[1]: query[A] x.com from 1.1.1.1\n"),
264            Some(STRONG)
265        );
266        assert_eq!(DnsParser.sniff(b"a,b,c\n1,2,3"), None);
267        assert_eq!(DnsParser.sniff(b"hello world\n"), None);
268    }
269
270    #[test]
271    fn claims_no_extension() {
272        assert!(DnsParser.extensions().is_empty());
273    }
274
275    #[test]
276    fn resolves_by_content() {
277        let reg = crate::parser::ParserRegistry::default();
278        assert_eq!(reg.resolve("-", DNS.as_bytes()).unwrap().id(), "dns");
279    }
280}