Skip to main content

anomalyx_normalize/parsers/
auditd.rs

1//! Linux auditd parser — `/var/log/audit/audit.log` syscall/exec records.
2//!
3//! Each line is `type=<TYPE> msg=audit(<epoch>:<serial>): key=value ...` (an
4//! optional `node=<host>` may precede `type=`). We pull the distinctive audit
5//! message ID out of `msg=audit(...)` — `epoch` as a float (for `coll.cusum` /
6//! cadence on bursty activity) and `serial` as the event id — then parse the
7//! remaining `key=value` fields. `type` and `syscall` are the categorical
8//! columns `dist` reads as exec/syscall mix drift.
9//!
10//! Field values are bare (type-inferred, e.g. `syscall=2` → `Int`),
11//! double-quoted (`comm="cat"`), or single-quoted (auditd `USER_*` records carry
12//! a `msg='...'` payload with spaces) — quoted values are kept verbatim as `Str`.
13//!
14//! Detected by the unmistakable `msg=audit(` signature; claims no extension
15//! (`audit.log` is too generic to hijack).
16
17use crate::infer;
18use crate::parser::{Confidence, FormatParser, STRONG};
19use crate::table::TableBuilder;
20use ax_core::{AxError, Column, Value};
21use std::collections::BTreeMap;
22
23#[derive(Debug, Default, Clone)]
24pub struct AuditdParser;
25
26const AUDIT_MARKER: &str = "msg=audit(";
27
28/// Splits the trailing `key=value` fields. Values are bare (until a space),
29/// double-quoted, or single-quoted (which may contain spaces); a bare value is
30/// type-inferred, a quoted value is kept verbatim as a string.
31fn parse_fields(text: &str) -> Vec<(String, Value)> {
32    let mut out = Vec::new();
33    let mut chars = text.chars().peekable();
34    loop {
35        while chars.peek() == Some(&' ') {
36            chars.next();
37        }
38        if chars.peek().is_none() {
39            break;
40        }
41        let mut key = String::new();
42        while let Some(&c) = chars.peek() {
43            if c == '=' || c == ' ' {
44                break;
45            }
46            key.push(c);
47            chars.next();
48        }
49        if chars.peek() != Some(&'=') {
50            continue; // a token with no '=' is not a field
51        }
52        chars.next(); // consume '='
53        let value = match chars.peek() {
54            Some(&q @ ('"' | '\'')) => {
55                chars.next();
56                let mut s = String::new();
57                for c in chars.by_ref() {
58                    if c == q {
59                        break;
60                    }
61                    s.push(c);
62                }
63                Value::Str(s)
64            }
65            _ => {
66                let mut bare = String::new();
67                while let Some(&c) = chars.peek() {
68                    if c == ' ' {
69                        break;
70                    }
71                    bare.push(c);
72                    chars.next();
73                }
74                infer::infer_scalar(&bare)
75            }
76        };
77        if !key.is_empty() {
78            out.push((key, value));
79        }
80    }
81    out
82}
83
84/// Parses one auditd record into `(epoch, serial, remaining-fields-text)`. The
85/// remaining text is the line with the `msg=audit(...):` chunk removed (so it
86/// holds `type=...` and the trailing fields). `None` if the line is not auditd.
87fn parse_record(line: &str) -> Option<(f64, i64, String)> {
88    // Split around the `msg=audit(EPOCH:SERIAL):` chunk without index math:
89    // `prefix` holds `type=...` (and any `node=...`), `tail` the trailing fields.
90    let (prefix, after_marker) = line.split_once(AUDIT_MARKER)?;
91    let (id, tail) = after_marker.split_once(')')?;
92    let (ts, serial) = id.split_once(':')?;
93    let epoch = ts.parse::<f64>().ok().filter(|f| f.is_finite())?;
94    let serial = serial.parse::<i64>().ok()?;
95    let fields = tail.strip_prefix(':').unwrap_or(tail);
96    Some((epoch, serial, format!("{prefix}{fields}")))
97}
98
99impl AuditdParser {
100    fn err(&self, msg: impl std::fmt::Display) -> AxError {
101        AxError::Parse {
102            format: self.id().to_string(),
103            message: msg.to_string(),
104        }
105    }
106}
107
108impl FormatParser for AuditdParser {
109    fn id(&self) -> &'static str {
110        "auditd"
111    }
112    fn extensions(&self) -> &'static [&'static str] {
113        &[]
114    }
115    fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
116        let text = std::str::from_utf8(bytes).ok()?;
117        let line = text.lines().find(|l| !l.trim().is_empty())?;
118        line.contains(AUDIT_MARKER).then_some(STRONG)
119    }
120    fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
121        let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
122        let mut builder = TableBuilder::new();
123        for line in text.lines() {
124            if line.trim().is_empty() {
125                continue;
126            }
127            let (epoch, serial, fields) = parse_record(line)
128                .ok_or_else(|| self.err("not an auditd record: no valid msg=audit(...)"))?;
129            let mut row: BTreeMap<String, Value> = BTreeMap::new();
130            row.insert("epoch".into(), Value::Float(epoch));
131            row.insert("serial".into(), Value::Int(serial));
132            for (key, value) in parse_fields(&fields) {
133                row.insert(key, value);
134            }
135            builder.push_row(row);
136        }
137        Ok(builder.finish())
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use ax_core::ColType;
145
146    const AUDIT: &str = concat!(
147        r#"type=SYSCALL msg=audit(1364481363.243:24287): arch=c000003e syscall=2 success=no exit=-13 pid=3538 uid=500 comm="cat" exe="/bin/cat""#,
148        "\n",
149        r#"type=CWD msg=audit(1364481363.243:24287): cwd="/home/user""#,
150        "\n",
151        r#"type=EXECVE msg=audit(1364481363.300:24288): argc=2 a0="ls" a1="-l""#,
152        "\n",
153    );
154
155    fn parse(s: &str) -> Vec<Column> {
156        AuditdParser.parse("-", s.as_bytes()).unwrap()
157    }
158    fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
159        cols.iter()
160            .find(|c| c.name == name)
161            .unwrap_or_else(|| panic!("missing column {name}"))
162    }
163
164    #[test]
165    fn audit_id_becomes_epoch_and_serial() {
166        let cols = parse(AUDIT);
167        let epoch = col(&cols, "epoch");
168        assert_eq!(epoch.ty, ColType::Float);
169        assert_eq!(epoch.cells[0], Value::Float(1_364_481_363.243));
170        assert_eq!(epoch.cells[2], Value::Float(1_364_481_363.300));
171        let serial = col(&cols, "serial");
172        assert_eq!(serial.ty, ColType::Int);
173        assert_eq!(
174            serial.cells,
175            vec![Value::Int(24287), Value::Int(24287), Value::Int(24288)]
176        );
177    }
178
179    #[test]
180    fn type_and_syscall_are_columns() {
181        let cols = parse(AUDIT);
182        assert_eq!(
183            col(&cols, "type").cells,
184            vec![
185                Value::Str("SYSCALL".into()),
186                Value::Str("CWD".into()),
187                Value::Str("EXECVE".into())
188            ]
189        );
190        let syscall = col(&cols, "syscall");
191        assert_eq!(syscall.cells[0], Value::Int(2)); // bare numeric → int
192        assert_eq!(syscall.cells[1], Value::Null); // CWD record has no syscall
193    }
194
195    #[test]
196    fn bare_and_quoted_values() {
197        let cols = parse(AUDIT);
198        assert_eq!(col(&cols, "success").cells[0], Value::Str("no".into()));
199        assert_eq!(col(&cols, "exit").cells[0], Value::Int(-13)); // negative int
200        assert_eq!(col(&cols, "comm").cells[0], Value::Str("cat".into())); // quoted
201        assert_eq!(col(&cols, "exe").cells[0], Value::Str("/bin/cat".into()));
202        assert_eq!(col(&cols, "cwd").cells[1], Value::Str("/home/user".into()));
203        assert_eq!(col(&cols, "a0").cells[2], Value::Str("ls".into()));
204    }
205
206    #[test]
207    fn single_quoted_value_with_spaces() {
208        // auditd USER_* records carry a msg='...' payload containing spaces.
209        let line = "type=USER_LOGIN msg=audit(1.5:9): pid=1 msg='op=login acct=root res=success'\n";
210        let cols = AuditdParser.parse("-", line.as_bytes()).unwrap();
211        assert_eq!(
212            col(&cols, "msg").cells[0],
213            Value::Str("op=login acct=root res=success".into())
214        );
215        assert_eq!(col(&cols, "pid").cells[0], Value::Int(1));
216    }
217
218    #[test]
219    fn node_prefixed_records_parse() {
220        // Remote logging prepends node=<host> before type=.
221        let cols = AuditdParser
222            .parse(
223                "-",
224                b"node=web01 type=SYSCALL msg=audit(1.0:1): syscall=59\n",
225            )
226            .unwrap();
227        assert_eq!(col(&cols, "node").cells[0], Value::Str("web01".into()));
228        assert_eq!(col(&cols, "type").cells[0], Value::Str("SYSCALL".into()));
229        assert_eq!(col(&cols, "syscall").cells[0], Value::Int(59));
230    }
231
232    #[test]
233    fn parse_fields_units() {
234        assert_eq!(
235            parse_fields(r#"a=1 b="two words" c='x y' d=-3"#),
236            vec![
237                ("a".into(), Value::Int(1)),
238                ("b".into(), Value::Str("two words".into())),
239                ("c".into(), Value::Str("x y".into())),
240                ("d".into(), Value::Int(-3)),
241            ]
242        );
243    }
244
245    #[test]
246    fn malformed_records_error() {
247        // No msg=audit( marker.
248        assert!(matches!(
249            AuditdParser.parse("-", b"this is not auditd\n"),
250            Err(AxError::Parse { .. })
251        ));
252        // Marker present but the id has no ':' separator.
253        assert!(matches!(
254            AuditdParser.parse("-", b"type=X msg=audit(bad): a=1\n"),
255            Err(AxError::Parse { .. })
256        ));
257        // Non-numeric serial.
258        assert!(matches!(
259            AuditdParser.parse("-", b"type=X msg=audit(1.0:zz): a=1\n"),
260            Err(AxError::Parse { .. })
261        ));
262    }
263
264    #[test]
265    fn sniff_keys_on_audit_marker() {
266        assert_eq!(AuditdParser.sniff(AUDIT.as_bytes()), Some(STRONG));
267        // type= without the audit marker is not auditd.
268        assert_eq!(AuditdParser.sniff(b"type=SYSCALL foo=bar\n"), None);
269        assert_eq!(AuditdParser.sniff(b"k=1 v=2\n"), None); // logfmt
270        assert_eq!(AuditdParser.sniff(b"a,b,c\n1,2,3"), None); // CSV
271    }
272
273    #[test]
274    fn claims_no_extension() {
275        assert!(AuditdParser.extensions().is_empty());
276    }
277
278    #[test]
279    fn resolves_by_content() {
280        let reg = crate::parser::ParserRegistry::default();
281        assert_eq!(reg.resolve("-", AUDIT.as_bytes()).unwrap().id(), "auditd");
282        // A non-auditd `.log` is not hijacked.
283        assert_eq!(reg.resolve("app.log", b"a,b\n1,2").unwrap().id(), "csv");
284    }
285}