anomalyx_normalize/parsers/
eve.rs1use crate::infer;
18use crate::parser::{Confidence, FormatParser, STRONG};
19use crate::table::TableBuilder;
20use ax_core::{AxError, Column, Value};
21use serde_json::Value as J;
22use std::collections::BTreeMap;
23
24#[derive(Debug, Default, Clone)]
25pub struct EveParser;
26
27fn looks_like_eve(obj: &serde_json::Map<String, J>) -> bool {
28 obj.get("event_type").is_some_and(J::is_string) && obj.contains_key("timestamp")
29}
30
31fn flatten(prefix: &str, value: &J, row: &mut BTreeMap<String, Value>) {
34 match value {
35 J::Object(map) => {
36 for (key, val) in map {
37 let dotted = if prefix.is_empty() {
38 key.clone()
39 } else {
40 format!("{prefix}.{key}")
41 };
42 flatten(&dotted, val, row);
43 }
44 }
45 leaf => {
46 row.insert(prefix.to_string(), infer::json_to_value(leaf));
47 }
48 }
49}
50
51impl EveParser {
52 fn err(&self, msg: impl std::fmt::Display) -> AxError {
53 AxError::Parse {
54 format: self.id().to_string(),
55 message: msg.to_string(),
56 }
57 }
58}
59
60impl FormatParser for EveParser {
61 fn id(&self) -> &'static str {
62 "eve"
63 }
64 fn extensions(&self) -> &'static [&'static str] {
65 &[]
66 }
67 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
68 let text = std::str::from_utf8(bytes).ok()?;
69 let line = text.lines().find(|l| !l.trim().is_empty())?;
70 let value: J = serde_json::from_str(line).ok()?;
71 value
72 .as_object()
73 .is_some_and(looks_like_eve)
74 .then_some(STRONG)
75 }
76 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
77 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
78 let mut builder = TableBuilder::new();
79 for line in text.lines() {
80 if line.trim().is_empty() {
81 continue;
82 }
83 let value: J = serde_json::from_str(line).map_err(|e| self.err(e))?;
84 if !value.is_object() {
85 return Err(self.err("EVE event is not a JSON object"));
86 }
87 let mut row: BTreeMap<String, Value> = BTreeMap::new();
88 flatten("", &value, &mut row);
89 builder.push_row(row);
90 }
91 Ok(builder.finish())
92 }
93}
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98 use ax_core::ColType;
99
100 const EVE: &str = concat!(
101 r#"{"timestamp":"2017-01-01T00:00:01.0+0000","flow_id":12,"event_type":"alert","src_ip":"1.2.3.4","src_port":1234,"dest_ip":"5.6.7.8","dest_port":80,"proto":"TCP","alert":{"signature_id":2010935,"signature":"ET POLICY external IP","category":"Potential Corporate Privacy Violation","severity":1},"metadata":["a","b"]}"#,
102 "\n",
103 r#"{"timestamp":"2017-01-01T00:00:02.0+0000","flow_id":13,"event_type":"dns","src_ip":"1.2.3.4","proto":"UDP","dns":{"type":"query","rrname":"example.com"}}"#,
104 "\n",
105 );
106
107 fn parse(s: &str) -> Vec<Column> {
108 EveParser.parse("-", s.as_bytes()).unwrap()
109 }
110 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
111 cols.iter()
112 .find(|c| c.name == name)
113 .unwrap_or_else(|| panic!("missing column {name}"))
114 }
115
116 #[test]
117 fn top_level_fields_are_typed() {
118 let cols = parse(EVE);
119 assert_eq!(
120 col(&cols, "event_type").cells[0],
121 Value::Str("alert".into())
122 );
123 assert_eq!(col(&cols, "src_port").ty, ColType::Int);
124 assert_eq!(col(&cols, "src_port").cells[0], Value::Int(1234));
125 assert_eq!(col(&cols, "dest_ip").cells[0], Value::Str("5.6.7.8".into()));
126 assert_eq!(col(&cols, "flow_id").cells[1], Value::Int(13));
127 }
128
129 #[test]
130 fn nested_alert_object_is_flattened_to_dotted_columns() {
131 let cols = parse(EVE);
132 assert_eq!(
133 col(&cols, "alert.category").cells[0],
134 Value::Str("Potential Corporate Privacy Violation".into())
135 );
136 assert_eq!(col(&cols, "alert.severity").ty, ColType::Int);
137 assert_eq!(col(&cols, "alert.severity").cells[0], Value::Int(1));
138 assert_eq!(
139 col(&cols, "alert.signature_id").cells[0],
140 Value::Int(2010935)
141 );
142 assert_eq!(col(&cols, "alert.category").cells[1], Value::Null);
144 }
145
146 #[test]
147 fn second_event_type_flattens_its_own_object() {
148 let cols = parse(EVE);
149 assert_eq!(col(&cols, "event_type").cells[1], Value::Str("dns".into()));
150 assert_eq!(
151 col(&cols, "dns.rrname").cells[1],
152 Value::Str("example.com".into())
153 );
154 assert_eq!(col(&cols, "dns.rrname").cells[0], Value::Null); }
156
157 #[test]
158 fn arrays_are_kept_as_canonical_json() {
159 let cols = parse(EVE);
160 assert_eq!(
161 col(&cols, "metadata").cells[0],
162 Value::Str("[\"a\",\"b\"]".into())
163 );
164 }
165
166 #[test]
167 fn flatten_units() {
168 let mut row = BTreeMap::new();
169 flatten(
170 "",
171 &serde_json::json!({"a": {"b": {"c": 5}}, "d": 2}),
172 &mut row,
173 );
174 assert_eq!(row.get("a.b.c"), Some(&Value::Int(5))); assert_eq!(row.get("d"), Some(&Value::Int(2)));
176 assert_eq!(row.len(), 2);
177 }
178
179 #[test]
180 fn malformed_events_error() {
181 assert!(matches!(
182 EveParser.parse("-", b"not json\n"),
183 Err(AxError::Parse { .. })
184 ));
185 assert!(matches!(
186 EveParser.parse("-", b"[1,2,3]\n"), Err(AxError::Parse { .. })
188 ));
189 }
190
191 #[test]
192 fn sniff_keys_on_event_type_and_timestamp() {
193 assert_eq!(EveParser.sniff(EVE.as_bytes()), Some(STRONG));
194 assert_eq!(
196 EveParser.sniff(br#"{"event_type":"alert","timestamp":"t"}"#),
197 Some(STRONG)
198 );
199 assert_eq!(
200 EveParser.sniff(br#"{"event_type":"alert"}"#),
201 None,
202 "no timestamp"
203 );
204 assert_eq!(
205 EveParser.sniff(br#"{"event_type":5,"timestamp":"t"}"#),
206 None,
207 "event_type not a string"
208 );
209 assert_eq!(EveParser.sniff(b"{\"a\":1}\n{\"a\":2}\n"), None);
211 assert_eq!(EveParser.sniff(b"a,b,c\n1,2,3"), None); }
213
214 #[test]
215 fn claims_no_extension() {
216 assert!(EveParser.extensions().is_empty());
217 }
218
219 #[test]
220 fn resolves_eve_over_ndjson_by_content() {
221 let reg = crate::parser::ParserRegistry::default();
222 assert_eq!(reg.resolve("-", EVE.as_bytes()).unwrap().id(), "eve");
223 assert_eq!(
225 reg.resolve("-", b"{\"a\":1}\n{\"a\":2}\n").unwrap().id(),
226 "ndjson"
227 );
228 }
229}