anomalyx_normalize/parsers/
dns.rs1use crate::parser::{Confidence, FormatParser, STRONG};
16use crate::table::TableBuilder;
17use ax_core::{AxError, Column, Value};
18use std::collections::BTreeMap;
19
20#[derive(Debug, Default, Clone)]
21pub struct DnsParser;
22
23const SENTINEL_YEAR: i32 = 1970;
26
27struct DnsQuery<'a> {
29 timestamp: Option<&'a str>,
30 qtype: &'a str,
31 qname: &'a str,
32 client: &'a str,
33}
34
35fn shannon_entropy(s: &str) -> f64 {
38 let mut counts: BTreeMap<char, usize> = BTreeMap::new();
39 let mut total = 0usize;
40 for c in s.chars() {
41 *counts.entry(c).or_insert(0) += 1;
42 total += 1;
43 }
44 if total == 0 {
45 return 0.0;
46 }
47 let len = total as f64;
48 let mut entropy = 0.0;
49 for &count in counts.values() {
50 let p = count as f64 / len;
51 entropy -= p * p.log2();
52 }
53 entropy
54}
55
56fn parse_epoch(timestamp: &str) -> Option<i64> {
59 let stamped = format!("{SENTINEL_YEAR} {timestamp}");
60 chrono::NaiveDateTime::parse_from_str(&stamped, "%Y %b %e %H:%M:%S")
61 .ok()
62 .map(|dt| dt.and_utc().timestamp())
63}
64
65fn parse_query(line: &str) -> Option<DnsQuery<'_>> {
69 let after = line.split_once("query[")?.1;
70 let (qtype, rest) = after.split_once(']')?;
71 if !qtype.chars().next().is_some_and(|c| c.is_ascii_uppercase()) {
72 return None;
73 }
74 let (qname, client) = rest.trim_start().split_once(" from ")?;
75 let qname = qname.trim();
76 let client = client.trim();
77 if qname.is_empty() {
78 return None;
79 }
80 let timestamp = line.get(..15);
82 Some(DnsQuery {
83 timestamp,
84 qtype,
85 qname,
86 client,
87 })
88}
89
90impl DnsParser {
91 fn err(&self, msg: impl std::fmt::Display) -> AxError {
92 AxError::Parse {
93 format: self.id().to_string(),
94 message: msg.to_string(),
95 }
96 }
97}
98
99impl FormatParser for DnsParser {
100 fn id(&self) -> &'static str {
101 "dns"
102 }
103 fn extensions(&self) -> &'static [&'static str] {
104 &[]
105 }
106 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
107 let text = std::str::from_utf8(bytes).ok()?;
108 text.lines()
109 .take(64)
110 .any(|l| parse_query(l).is_some())
111 .then_some(STRONG)
112 }
113 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
114 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
115 let mut builder = TableBuilder::new();
116 let mut queries = 0usize;
117 for line in text.lines() {
118 let Some(q) = parse_query(line) else {
119 continue; };
121 let mut row: BTreeMap<String, Value> = BTreeMap::new();
122 if let Some(ts) = q.timestamp {
123 row.insert("timestamp".into(), Value::Str(ts.to_string()));
124 if let Some(epoch) = parse_epoch(ts) {
125 row.insert("timestamp_epoch".into(), Value::Int(epoch));
126 }
127 }
128 row.insert("qtype".into(), Value::Str(q.qtype.to_string()));
129 row.insert("qname".into(), Value::Str(q.qname.to_string()));
130 row.insert(
131 "qname_length".into(),
132 Value::Int(q.qname.chars().count() as i64),
133 );
134 row.insert(
135 "qname_entropy".into(),
136 Value::Float(shannon_entropy(q.qname)),
137 );
138 row.insert("client".into(), Value::Str(q.client.to_string()));
139 builder.push_row(row);
140 queries += 1;
141 }
142 if queries == 0 {
143 return Err(self.err("no DNS query lines found"));
144 }
145 Ok(builder.finish())
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152 use ax_core::ColType;
153
154 const DNS: &str = "\
155Jan 1 00:00:00 dnsmasq[1234]: query[A] example.com from 10.0.0.1
156Jan 1 00:00:00 dnsmasq[1234]: forwarded example.com to 8.8.8.8
157Jan 1 00:00:01 dnsmasq[1234]: reply example.com is 1.2.3.4
158Jan 1 00:00:05 dnsmasq[1234]: query[TXT] aGVsbG8gZXhmaWwK.evil.example from 10.0.0.2
159";
160
161 fn parse(s: &str) -> Vec<Column> {
162 DnsParser.parse("-", s.as_bytes()).unwrap()
163 }
164 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
165 cols.iter()
166 .find(|c| c.name == name)
167 .unwrap_or_else(|| panic!("missing column {name}"))
168 }
169
170 #[test]
171 fn only_query_lines_become_rows() {
172 let cols = parse(DNS);
173 assert_eq!(col(&cols, "qname").cells.len(), 2);
175 assert_eq!(
176 col(&cols, "qname").cells,
177 vec![
178 Value::Str("example.com".into()),
179 Value::Str("aGVsbG8gZXhmaWwK.evil.example".into())
180 ]
181 );
182 assert_eq!(
183 col(&cols, "qtype").cells,
184 vec![Value::Str("A".into()), Value::Str("TXT".into())]
185 );
186 assert_eq!(col(&cols, "client").cells[1], Value::Str("10.0.0.2".into()));
187 }
188
189 #[test]
190 fn computed_features_for_dga_exfil_detection() {
191 let cols = parse(DNS);
192 let len = col(&cols, "qname_length");
193 assert_eq!(len.ty, ColType::Int);
194 assert_eq!(len.cells[0], Value::Int(11)); let entropy = col(&cols, "qname_entropy");
196 assert_eq!(entropy.ty, ColType::Float);
197 let (Value::Float(plain), Value::Float(exfil)) = (&entropy.cells[0], &entropy.cells[1])
199 else {
200 panic!("expected float entropies")
201 };
202 assert!(exfil > plain, "{exfil} should exceed {plain}");
203 }
204
205 #[test]
206 fn timestamp_parsed_to_epoch_with_sentinel_year() {
207 let cols = parse(DNS);
208 let epoch = col(&cols, "timestamp_epoch");
209 assert_eq!(epoch.ty, ColType::Int);
210 assert_eq!(epoch.cells, vec![Value::Int(0), Value::Int(5)]);
212 assert_eq!(
213 col(&cols, "timestamp").cells[0],
214 Value::Str("Jan 1 00:00:00".into())
215 );
216 }
217
218 #[test]
219 fn shannon_entropy_units() {
220 assert_eq!(shannon_entropy(""), 0.0);
221 assert_eq!(shannon_entropy("aaaa"), 0.0); assert_eq!(shannon_entropy("ab"), 1.0); assert_eq!(shannon_entropy("aabb"), 1.0);
224 assert_eq!(shannon_entropy("abcd"), 2.0); }
226
227 #[test]
228 fn parse_epoch_units() {
229 assert_eq!(parse_epoch("Jan 1 00:00:00"), Some(0));
230 assert_eq!(parse_epoch("Jan 1 00:00:05"), Some(5));
231 assert_eq!(parse_epoch("not a timestamp"), None);
232 }
233
234 #[test]
235 fn parse_query_units() {
236 let q = parse_query("Jan 1 00:00:00 dnsmasq[1]: query[A] a.com from 1.2.3.4").unwrap();
237 assert_eq!(q.qtype, "A");
238 assert_eq!(q.qname, "a.com");
239 assert_eq!(q.client, "1.2.3.4");
240 assert!(parse_query("Jan 1 00:00:00 dnsmasq[1]: forwarded a.com to 8.8.8.8").is_none());
242 assert!(parse_query("the query[0] index from array").is_none()); assert!(parse_query("query[A] from 1.2.3.4").is_none()); }
245
246 #[test]
247 fn no_query_lines_is_an_error() {
248 assert!(matches!(
249 DnsParser.parse("-", b"just some text\nno queries here\n"),
250 Err(AxError::Parse { .. })
251 ));
252 assert!(matches!(
253 DnsParser.parse("-", b""),
254 Err(AxError::Parse { .. })
255 ));
256 }
257
258 #[test]
259 fn sniff_keys_on_a_query_line() {
260 assert_eq!(DnsParser.sniff(DNS.as_bytes()), Some(STRONG));
261 assert_eq!(
263 DnsParser.sniff(b"Jan 1 00:00:00 dnsmasq[1]: started\nJan 1 00:00:01 dnsmasq[1]: query[A] x.com from 1.1.1.1\n"),
264 Some(STRONG)
265 );
266 assert_eq!(DnsParser.sniff(b"a,b,c\n1,2,3"), None);
267 assert_eq!(DnsParser.sniff(b"hello world\n"), None);
268 }
269
270 #[test]
271 fn claims_no_extension() {
272 assert!(DnsParser.extensions().is_empty());
273 }
274
275 #[test]
276 fn resolves_by_content() {
277 let reg = crate::parser::ParserRegistry::default();
278 assert_eq!(reg.resolve("-", DNS.as_bytes()).unwrap().id(), "dns");
279 }
280}