Skip to main content

anomalyx_normalize/parsers/
eve.rs

1//! Suricata/Zeek EVE JSON parser — the standard IDS alert stream.
2//!
3//! EVE is NDJSON (one event per line), every event tagged with an `event_type`
4//! (`alert`, `dns`, `flow`, `http`, …) and a `timestamp`. The interesting fields
5//! live one level down in a per-type object (`alert.signature`, `alert.category`,
6//! `alert.severity`, `dns.rrname`, …). Generic NDJSON would stringify those
7//! nested objects; this parser **flattens them into dotted columns** so a value
8//! like `alert.category` is its own column — exactly what `dist.chi2 --baseline`
9//! reads as alert-type drift (quiet vs incident window), with a brand-new alert
10//! class surfacing as a category never seen in the baseline.
11//!
12//! Arrays are kept as their canonical JSON string (not exploded). Detected by the
13//! `event_type` + `timestamp` signature; claims no extension (EVE is generically
14//! `eve.json`, owned by the JSON parser — pipe it on stdin for EVE-aware
15//! flattening).
16
17use crate::infer;
18use crate::parser::{Confidence, FormatParser, STRONG};
19use crate::table::TableBuilder;
20use ax_core::{AxError, Column, Value};
21use serde_json::Value as J;
22use std::collections::BTreeMap;
23
24#[derive(Debug, Default, Clone)]
25pub struct EveParser;
26
27fn looks_like_eve(obj: &serde_json::Map<String, J>) -> bool {
28    obj.get("event_type").is_some_and(J::is_string) && obj.contains_key("timestamp")
29}
30
31/// Flattens nested objects into dotted keys (`alert.category`); scalars and
32/// arrays are leaves (arrays kept as canonical JSON via [`infer::json_to_value`]).
33fn flatten(prefix: &str, value: &J, row: &mut BTreeMap<String, Value>) {
34    match value {
35        J::Object(map) => {
36            for (key, val) in map {
37                let dotted = if prefix.is_empty() {
38                    key.clone()
39                } else {
40                    format!("{prefix}.{key}")
41                };
42                flatten(&dotted, val, row);
43            }
44        }
45        leaf => {
46            row.insert(prefix.to_string(), infer::json_to_value(leaf));
47        }
48    }
49}
50
51impl EveParser {
52    fn err(&self, msg: impl std::fmt::Display) -> AxError {
53        AxError::Parse {
54            format: self.id().to_string(),
55            message: msg.to_string(),
56        }
57    }
58}
59
60impl FormatParser for EveParser {
61    fn id(&self) -> &'static str {
62        "eve"
63    }
64    fn extensions(&self) -> &'static [&'static str] {
65        &[]
66    }
67    fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
68        let text = std::str::from_utf8(bytes).ok()?;
69        let line = text.lines().find(|l| !l.trim().is_empty())?;
70        let value: J = serde_json::from_str(line).ok()?;
71        value
72            .as_object()
73            .is_some_and(looks_like_eve)
74            .then_some(STRONG)
75    }
76    fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
77        let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
78        let mut builder = TableBuilder::new();
79        for line in text.lines() {
80            if line.trim().is_empty() {
81                continue;
82            }
83            let value: J = serde_json::from_str(line).map_err(|e| self.err(e))?;
84            if !value.is_object() {
85                return Err(self.err("EVE event is not a JSON object"));
86            }
87            let mut row: BTreeMap<String, Value> = BTreeMap::new();
88            flatten("", &value, &mut row);
89            builder.push_row(row);
90        }
91        Ok(builder.finish())
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use ax_core::ColType;
99
100    const EVE: &str = concat!(
101        r#"{"timestamp":"2017-01-01T00:00:01.0+0000","flow_id":12,"event_type":"alert","src_ip":"1.2.3.4","src_port":1234,"dest_ip":"5.6.7.8","dest_port":80,"proto":"TCP","alert":{"signature_id":2010935,"signature":"ET POLICY external IP","category":"Potential Corporate Privacy Violation","severity":1},"metadata":["a","b"]}"#,
102        "\n",
103        r#"{"timestamp":"2017-01-01T00:00:02.0+0000","flow_id":13,"event_type":"dns","src_ip":"1.2.3.4","proto":"UDP","dns":{"type":"query","rrname":"example.com"}}"#,
104        "\n",
105    );
106
107    fn parse(s: &str) -> Vec<Column> {
108        EveParser.parse("-", s.as_bytes()).unwrap()
109    }
110    fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
111        cols.iter()
112            .find(|c| c.name == name)
113            .unwrap_or_else(|| panic!("missing column {name}"))
114    }
115
116    #[test]
117    fn top_level_fields_are_typed() {
118        let cols = parse(EVE);
119        assert_eq!(
120            col(&cols, "event_type").cells[0],
121            Value::Str("alert".into())
122        );
123        assert_eq!(col(&cols, "src_port").ty, ColType::Int);
124        assert_eq!(col(&cols, "src_port").cells[0], Value::Int(1234));
125        assert_eq!(col(&cols, "dest_ip").cells[0], Value::Str("5.6.7.8".into()));
126        assert_eq!(col(&cols, "flow_id").cells[1], Value::Int(13));
127    }
128
129    #[test]
130    fn nested_alert_object_is_flattened_to_dotted_columns() {
131        let cols = parse(EVE);
132        assert_eq!(
133            col(&cols, "alert.category").cells[0],
134            Value::Str("Potential Corporate Privacy Violation".into())
135        );
136        assert_eq!(col(&cols, "alert.severity").ty, ColType::Int);
137        assert_eq!(col(&cols, "alert.severity").cells[0], Value::Int(1));
138        assert_eq!(
139            col(&cols, "alert.signature_id").cells[0],
140            Value::Int(2010935)
141        );
142        // The dns event has no alert.* fields → padded null.
143        assert_eq!(col(&cols, "alert.category").cells[1], Value::Null);
144    }
145
146    #[test]
147    fn second_event_type_flattens_its_own_object() {
148        let cols = parse(EVE);
149        assert_eq!(col(&cols, "event_type").cells[1], Value::Str("dns".into()));
150        assert_eq!(
151            col(&cols, "dns.rrname").cells[1],
152            Value::Str("example.com".into())
153        );
154        assert_eq!(col(&cols, "dns.rrname").cells[0], Value::Null); // alert row
155    }
156
157    #[test]
158    fn arrays_are_kept_as_canonical_json() {
159        let cols = parse(EVE);
160        assert_eq!(
161            col(&cols, "metadata").cells[0],
162            Value::Str("[\"a\",\"b\"]".into())
163        );
164    }
165
166    #[test]
167    fn flatten_units() {
168        let mut row = BTreeMap::new();
169        flatten(
170            "",
171            &serde_json::json!({"a": {"b": {"c": 5}}, "d": 2}),
172            &mut row,
173        );
174        assert_eq!(row.get("a.b.c"), Some(&Value::Int(5))); // deep nesting
175        assert_eq!(row.get("d"), Some(&Value::Int(2)));
176        assert_eq!(row.len(), 2);
177    }
178
179    #[test]
180    fn malformed_events_error() {
181        assert!(matches!(
182            EveParser.parse("-", b"not json\n"),
183            Err(AxError::Parse { .. })
184        ));
185        assert!(matches!(
186            EveParser.parse("-", b"[1,2,3]\n"), // valid JSON, not an object
187            Err(AxError::Parse { .. })
188        ));
189    }
190
191    #[test]
192    fn sniff_keys_on_event_type_and_timestamp() {
193        assert_eq!(EveParser.sniff(EVE.as_bytes()), Some(STRONG));
194        // event_type must be a string AND timestamp present.
195        assert_eq!(
196            EveParser.sniff(br#"{"event_type":"alert","timestamp":"t"}"#),
197            Some(STRONG)
198        );
199        assert_eq!(
200            EveParser.sniff(br#"{"event_type":"alert"}"#),
201            None,
202            "no timestamp"
203        );
204        assert_eq!(
205            EveParser.sniff(br#"{"event_type":5,"timestamp":"t"}"#),
206            None,
207            "event_type not a string"
208        );
209        // Generic NDJSON without the EVE signature is not EVE.
210        assert_eq!(EveParser.sniff(b"{\"a\":1}\n{\"a\":2}\n"), None);
211        assert_eq!(EveParser.sniff(b"a,b,c\n1,2,3"), None); // not JSON
212    }
213
214    #[test]
215    fn claims_no_extension() {
216        assert!(EveParser.extensions().is_empty());
217    }
218
219    #[test]
220    fn resolves_eve_over_ndjson_by_content() {
221        let reg = crate::parser::ParserRegistry::default();
222        assert_eq!(reg.resolve("-", EVE.as_bytes()).unwrap().id(), "eve");
223        // Generic NDJSON (no EVE signature) is still NDJSON.
224        assert_eq!(
225            reg.resolve("-", b"{\"a\":1}\n{\"a\":2}\n").unwrap().id(),
226            "ndjson"
227        );
228    }
229}