Skip to main content

anomalyx_normalize/parsers/
evtx.rs

1//! Windows Event Log (EVTX) parser — central to endpoint forensics.
2//!
3//! EVTX is a binary chunked format; the heavy lifting (BinXML decoding) is
4//! delegated to the `evtx` crate, which yields one `serde_json::Value` per event
5//! plus the record id and timestamp. We flatten each event into dotted columns
6//! (`Event.System.EventID`, `Event.System.Provider.#attributes.Name`,
7//! `Event.EventData.TargetUserName`, …) and synthesize `eventRecordId` and
8//! `timestampEpoch` (Unix seconds). That gives the detectors what the issue
9//! wants: `Event.System.EventID` for rare event-ID `point` detection and logon
10//! `dist` drift, `timestampEpoch` for off-hours `contextual` (`--period 24`).
11//!
12//! Binary magic `ElfFile\0` (confidence `MAGIC`); extension `.evtx`. The `evtx`
13//! crate decodes chunks in parallel but yields records in file order, so output
14//! is deterministic. Behind the default-on `evtx` feature.
15
16use crate::infer;
17use crate::parser::{Confidence, FormatParser, MAGIC};
18use crate::table::TableBuilder;
19use ax_core::{AxError, Column, Value};
20use serde_json::Value as J;
21use std::collections::BTreeMap;
22
23/// The 8-byte EVTX file-header magic.
24const EVTX_MAGIC: &[u8] = b"ElfFile\x00";
25
26#[derive(Debug, Default, Clone)]
27pub struct EvtxParser;
28
29/// Flattens a decoded event into dotted columns; scalars and arrays are leaves
30/// (arrays kept as canonical JSON via [`infer::json_to_value`]).
31fn flatten(prefix: &str, value: &J, row: &mut BTreeMap<String, Value>) {
32    match value {
33        J::Object(map) => {
34            for (key, val) in map {
35                let dotted = if prefix.is_empty() {
36                    key.clone()
37                } else {
38                    format!("{prefix}.{key}")
39                };
40                flatten(&dotted, val, row);
41            }
42        }
43        leaf => {
44            row.insert(prefix.to_string(), infer::json_to_value(leaf));
45        }
46    }
47}
48
49/// Builds one row from a decoded EVTX record: the flattened event plus the
50/// synthesized `eventRecordId` and `timestampEpoch` columns.
51fn record_to_row(data: &J, event_record_id: i64, epoch_seconds: i64) -> BTreeMap<String, Value> {
52    let mut row = BTreeMap::new();
53    flatten("", data, &mut row);
54    row.insert("eventRecordId".to_string(), Value::Int(event_record_id));
55    row.insert("timestampEpoch".to_string(), Value::Int(epoch_seconds));
56    row
57}
58
59impl EvtxParser {
60    fn err(&self, msg: impl std::fmt::Display) -> AxError {
61        AxError::Parse {
62            format: self.id().to_string(),
63            message: msg.to_string(),
64        }
65    }
66}
67
68impl FormatParser for EvtxParser {
69    fn id(&self) -> &'static str {
70        "evtx"
71    }
72    fn extensions(&self) -> &'static [&'static str] {
73        &["evtx"]
74    }
75    fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
76        bytes.starts_with(EVTX_MAGIC).then_some(MAGIC)
77    }
78    fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
79        let mut parser = evtx::EvtxParser::from_buffer(bytes.to_vec()).map_err(|e| self.err(e))?;
80        let mut builder = TableBuilder::new();
81        for record in parser.records_json_value() {
82            let record = record.map_err(|e| self.err(e))?;
83            let row = record_to_row(
84                &record.data,
85                record.event_record_id as i64,
86                record.timestamp.as_second(),
87            );
88            builder.push_row(row);
89        }
90        Ok(builder.finish())
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97    use ax_core::ColType;
98
99    /// A structurally-valid but empty EVTX: a 4096-byte file header with the
100    /// magic and `header_block_size = 4096`, and no chunk data — so the `evtx`
101    /// crate parses it and yields zero records. (A real EVTX with records needs a
102    /// binary fixture; the per-record extraction is unit-tested via
103    /// [`record_to_row`] below.)
104    fn empty_evtx() -> Vec<u8> {
105        let mut buf = vec![0u8; 4096];
106        buf[0..8].copy_from_slice(EVTX_MAGIC);
107        buf[32..36].copy_from_slice(&128u32.to_le_bytes()); // header_size
108        buf[36..38].copy_from_slice(&1u16.to_le_bytes()); // minor version
109        buf[38..40].copy_from_slice(&3u16.to_le_bytes()); // major version
110        buf[40..42].copy_from_slice(&4096u16.to_le_bytes()); // header_block_size
111        buf
112    }
113
114    #[test]
115    fn valid_empty_file_roundtrips_to_no_rows() {
116        let cols = EvtxParser.parse("x.evtx", &empty_evtx()).unwrap();
117        assert!(cols.is_empty(), "no records → no columns");
118    }
119
120    #[test]
121    fn record_to_row_flattens_event_and_adds_synthetic_columns() {
122        // A logon-shaped EVTX event (Security 4624).
123        let data = serde_json::json!({
124            "Event": {
125                "System": {
126                    "EventID": 4624,
127                    "Provider": {"#attributes": {"Name": "Microsoft-Windows-Security-Auditing"}},
128                    "Computer": "WIN-HOST",
129                    "Level": 0
130                },
131                "EventData": {"TargetUserName": "alice", "LogonType": 2}
132            }
133        });
134        let row = record_to_row(&data, 4242, 1_609_459_200);
135
136        assert_eq!(row.get("Event.System.EventID"), Some(&Value::Int(4624)));
137        assert_eq!(
138            row.get("Event.System.Provider.#attributes.Name"),
139            Some(&Value::Str("Microsoft-Windows-Security-Auditing".into()))
140        );
141        assert_eq!(
142            row.get("Event.EventData.TargetUserName"),
143            Some(&Value::Str("alice".into()))
144        );
145        assert_eq!(row.get("Event.EventData.LogonType"), Some(&Value::Int(2)));
146        assert_eq!(row.get("eventRecordId"), Some(&Value::Int(4242)));
147        assert_eq!(row.get("timestampEpoch"), Some(&Value::Int(1_609_459_200)));
148    }
149
150    #[test]
151    fn flatten_keeps_arrays_as_json_and_recurses_objects() {
152        let mut row = BTreeMap::new();
153        flatten(
154            "",
155            &serde_json::json!({"a": {"b": 1}, "c": [1, 2]}),
156            &mut row,
157        );
158        assert_eq!(row.get("a.b"), Some(&Value::Int(1)));
159        assert_eq!(row.get("c"), Some(&Value::Str("[1,2]".into()))); // array → canonical JSON
160    }
161
162    #[test]
163    fn end_to_end_columns_via_builder() {
164        // Two synthesized rows through the same path parse() uses, to pin the
165        // typed columns and null-padding the detectors consume.
166        let mut builder = TableBuilder::new();
167        builder.push_row(record_to_row(
168            &serde_json::json!({"Event": {"System": {"EventID": 4624}}}),
169            1,
170            100,
171        ));
172        builder.push_row(record_to_row(
173            &serde_json::json!({"Event": {"System": {"EventID": 4625}}}),
174            2,
175            200,
176        ));
177        let cols = builder.finish();
178        let eid = cols
179            .iter()
180            .find(|c| c.name == "Event.System.EventID")
181            .unwrap();
182        assert_eq!(eid.ty, ColType::Int);
183        assert_eq!(eid.cells, vec![Value::Int(4624), Value::Int(4625)]);
184    }
185
186    #[test]
187    fn malformed_input_errors() {
188        // Wrong magic / not an EVTX file.
189        assert!(matches!(
190            EvtxParser.parse("x.evtx", b"not an evtx file at all"),
191            Err(AxError::Parse { .. })
192        ));
193        // Magic present but truncated header.
194        assert!(matches!(
195            EvtxParser.parse("x.evtx", b"ElfFile\x00short"),
196            Err(AxError::Parse { .. })
197        ));
198    }
199
200    #[test]
201    fn sniff_keys_on_magic() {
202        assert_eq!(EvtxParser.sniff(&empty_evtx()), Some(MAGIC));
203        assert_eq!(EvtxParser.sniff(b"ElfFile\x00....."), Some(MAGIC));
204        assert_eq!(EvtxParser.sniff(b"ElfFile"), None); // missing the NUL byte
205        assert_eq!(EvtxParser.sniff(b"PAR1...."), None); // parquet, not evtx
206        assert_eq!(EvtxParser.sniff(b"{\"a\":1}"), None);
207    }
208
209    #[test]
210    fn claims_evtx_extension() {
211        assert_eq!(EvtxParser.extensions(), &["evtx"]);
212    }
213
214    #[test]
215    fn resolves_by_extension_and_magic() {
216        let reg = crate::parser::ParserRegistry::default();
217        assert_eq!(reg.resolve("Security.evtx", b"zz").unwrap().id(), "evtx");
218        assert_eq!(reg.resolve("-", &empty_evtx()).unwrap().id(), "evtx");
219    }
220}