1use crate::infer;
18use crate::parser::{Confidence, FormatParser, STRONG};
19use crate::table::TableBuilder;
20use ax_core::{AxError, Column, Value};
21use std::collections::BTreeMap;
22
23fn is_ident(c: char) -> bool {
24 c.is_ascii_alphanumeric()
25}
26
27fn unescape(s: &str) -> String {
30 let mut out = String::new();
31 let mut chars = s.chars();
32 while let Some(c) = chars.next() {
33 if c == '\\' {
34 match chars.next() {
35 Some('n') => out.push('\n'),
36 Some('r') => out.push('\r'),
37 Some('t') => out.push('\t'),
38 Some(other) => out.push(other),
39 None => out.push('\\'),
40 }
41 } else {
42 out.push(c);
43 }
44 }
45 out
46}
47
48fn split_unescaped_pipe(s: &str, max: usize) -> Vec<String> {
51 let mut fields = Vec::new();
52 let mut cur = String::new();
53 let mut chars = s.chars().peekable();
54 while let Some(c) = chars.next() {
55 if c == '\\' {
56 cur.push('\\');
57 if let Some(next) = chars.next() {
58 cur.push(next);
59 }
60 } else if c == '|' && fields.len() < max - 1 {
61 fields.push(std::mem::take(&mut cur));
62 } else {
63 cur.push(c);
64 }
65 }
66 fields.push(cur);
67 fields
68}
69
70fn parse_cef_extension(ext: &str) -> Vec<(String, String)> {
74 let chars: Vec<char> = ext.chars().collect();
75 let n = chars.len();
76 let mut keys: Vec<(usize, usize)> = Vec::new(); let mut i = 0;
79 while i < n {
80 if (i == 0 || chars[i - 1] == ' ') && is_ident(chars[i]) {
81 let mut j = i;
82 while j < n && is_ident(chars[j]) {
83 j += 1;
84 }
85 if j < n && chars[j] == '=' {
86 keys.push((i, j));
87 i = j + 1;
88 continue;
89 }
90 }
91 i += 1;
92 }
93 let mut pairs = Vec::new();
94 for (idx, &(key_start, eq)) in keys.iter().enumerate() {
95 let key: String = chars[key_start..eq].iter().collect();
96 let value_end = keys.get(idx + 1).map_or(n, |&(next_start, _)| next_start);
97 let raw: String = chars[eq + 1..value_end].iter().collect();
98 pairs.push((key, unescape(raw.trim_end())));
99 }
100 pairs
101}
102
103fn leef_delimiter(spec: &str) -> char {
106 if let Some(hex) = spec.strip_prefix('x').or_else(|| spec.strip_prefix("\\x")) {
107 if let Ok(byte) = u8::from_str_radix(hex, 16) {
108 return byte as char;
109 }
110 }
111 spec.chars().next().unwrap_or('\t')
112}
113
114#[derive(Debug, Default, Clone)]
117pub struct CefParser;
118
119const CEF_HEADER: [&str; 7] = [
120 "cefVersion",
121 "deviceVendor",
122 "deviceProduct",
123 "deviceVersion",
124 "signatureId",
125 "name",
126 "severity",
127];
128
129impl CefParser {
130 fn err(&self, msg: impl std::fmt::Display) -> AxError {
131 AxError::Parse {
132 format: self.id().to_string(),
133 message: msg.to_string(),
134 }
135 }
136}
137
138impl FormatParser for CefParser {
139 fn id(&self) -> &'static str {
140 "cef"
141 }
142 fn extensions(&self) -> &'static [&'static str] {
143 &["cef"]
144 }
145 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
146 let text = std::str::from_utf8(bytes).ok()?;
147 let line = text.lines().find(|l| !l.trim().is_empty())?;
148 line.starts_with("CEF:").then_some(STRONG)
149 }
150 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
151 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
152 let mut builder = TableBuilder::new();
153 for line in text.lines() {
154 if line.trim().is_empty() {
155 continue;
156 }
157 let rest = line
158 .strip_prefix("CEF:")
159 .ok_or_else(|| self.err("not a CEF line: missing 'CEF:' prefix"))?;
160 let fields = split_unescaped_pipe(rest, 8);
161 if fields.len() < CEF_HEADER.len() {
162 return Err(self.err("CEF header requires 7 pipe-delimited fields"));
163 }
164 let mut row: BTreeMap<String, Value> = BTreeMap::new();
165 for (name, raw) in CEF_HEADER.iter().zip(&fields) {
166 let decoded = unescape(raw);
167 let cell = if *name == "severity" {
170 infer::infer_scalar(&decoded)
171 } else {
172 Value::Str(decoded)
173 };
174 row.insert((*name).to_string(), cell);
175 }
176 if let Some(ext) = fields.get(CEF_HEADER.len()) {
177 for (key, value) in parse_cef_extension(ext) {
178 row.insert(key, infer::infer_scalar(&value));
179 }
180 }
181 builder.push_row(row);
182 }
183 Ok(builder.finish())
184 }
185}
186
187#[derive(Debug, Default, Clone)]
190pub struct LeefParser;
191
192const LEEF_HEADER: [&str; 5] = [
193 "leefVersion",
194 "vendor",
195 "product",
196 "productVersion",
197 "eventId",
198];
199
200impl LeefParser {
201 fn err(&self, msg: impl std::fmt::Display) -> AxError {
202 AxError::Parse {
203 format: self.id().to_string(),
204 message: msg.to_string(),
205 }
206 }
207}
208
209impl FormatParser for LeefParser {
210 fn id(&self) -> &'static str {
211 "leef"
212 }
213 fn extensions(&self) -> &'static [&'static str] {
214 &["leef"]
215 }
216 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
217 let text = std::str::from_utf8(bytes).ok()?;
218 let line = text.lines().find(|l| !l.trim().is_empty())?;
219 line.starts_with("LEEF:").then_some(STRONG)
220 }
221 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
222 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
223 let mut builder = TableBuilder::new();
224 for line in text.lines() {
225 if line.trim().is_empty() {
226 continue;
227 }
228 let rest = line
229 .strip_prefix("LEEF:")
230 .ok_or_else(|| self.err("not a LEEF line: missing 'LEEF:' prefix"))?;
231 let version = rest.split('|').next().unwrap_or("");
233 let is_v2 = version.starts_with('2');
234 let header_count = LEEF_HEADER.len() + usize::from(is_v2);
235 let parts: Vec<&str> = rest.splitn(header_count + 1, '|').collect();
236 if parts.len() < LEEF_HEADER.len() {
237 return Err(self.err("LEEF header requires at least 5 fields"));
238 }
239 let mut row: BTreeMap<String, Value> = BTreeMap::new();
240 for (name, value) in LEEF_HEADER.iter().zip(&parts) {
241 row.insert((*name).to_string(), Value::Str((*value).to_string()));
242 }
243 let delimiter = if is_v2 {
244 parts
245 .get(LEEF_HEADER.len())
246 .map_or('\t', |s| leef_delimiter(s))
247 } else {
248 '\t'
249 };
250 if let Some(ext) = parts.get(header_count) {
251 for token in ext.split(delimiter) {
252 if let Some((key, value)) = token.split_once('=') {
253 if !key.is_empty() {
254 row.insert(key.to_string(), infer::infer_scalar(value));
255 }
256 }
257 }
258 }
259 builder.push_row(row);
260 }
261 Ok(builder.finish())
262 }
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268 use ax_core::ColType;
269
270 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
271 cols.iter()
272 .find(|c| c.name == name)
273 .unwrap_or_else(|| panic!("missing column {name}"))
274 }
275
276 #[test]
279 fn unescape_decodes_siem_escapes() {
280 assert_eq!(unescape(r"a\|b"), "a|b");
281 assert_eq!(unescape(r"a\=b"), "a=b");
282 assert_eq!(unescape(r"a\\b"), r"a\b");
283 assert_eq!(unescape(r"a\nb"), "a\nb");
284 assert_eq!(unescape("plain"), "plain");
285 }
286
287 #[test]
288 fn split_unescaped_pipe_keeps_escaped_and_extra() {
289 let f = split_unescaped_pipe(r"a\|b|c|d|e|f|g|h|i|j", 8);
292 assert_eq!(f.len(), 8);
293 assert_eq!(f[0], r"a\|b", "escaped pipe is not a separator");
294 assert_eq!(f[7], "i|j", "extension field absorbs extra pipes");
295 }
296
297 #[test]
298 fn parse_cef_extension_handles_spaces_and_escapes() {
299 let pairs = parse_cef_extension(r"src=10.0.0.1 msg=worm was stopped spt=1232 note=a\=b");
300 assert_eq!(
301 pairs,
302 vec![
303 ("src".into(), "10.0.0.1".into()),
304 ("msg".into(), "worm was stopped".into()), ("spt".into(), "1232".into()),
306 ("note".into(), "a=b".into()), ]
308 );
309 }
310
311 #[test]
312 fn parse_cef_extension_only_breaks_at_space_preceded_keys() {
313 assert_eq!(
316 parse_cef_extension("k=ab=cd"),
317 vec![("k".into(), "ab=cd".into())]
318 );
319 assert_eq!(
320 parse_cef_extension("a=1 b=2"),
321 vec![("a".into(), "1".into()), ("b".into(), "2".into())]
322 );
323 }
324
325 #[test]
326 fn leef_delimiter_resolves_char_hex_and_default() {
327 assert_eq!(leef_delimiter("^"), '^');
328 assert_eq!(leef_delimiter("x09"), '\t');
329 assert_eq!(leef_delimiter(r"\x09"), '\t');
330 assert_eq!(leef_delimiter(""), '\t'); }
332
333 const CEF: &str = concat!(
336 r"CEF:0|Security|threatmanager|1.0|100|worm stopped|10|src=10.0.0.1 spt=1232 msg=took action",
337 "\n",
338 r"CEF:0|Security|threatmanager|1.0|200|port scan|3|src=10.0.0.9 dst=2.1.2.2",
339 "\n",
340 );
341
342 fn cef(s: &str) -> Vec<Column> {
343 CefParser.parse("-", s.as_bytes()).unwrap()
344 }
345
346 #[test]
347 fn cef_header_fields() {
348 let cols = cef(CEF);
349 assert_eq!(
350 col(&cols, "deviceProduct").cells[0],
351 Value::Str("threatmanager".into())
352 );
353 assert_eq!(col(&cols, "signatureId").cells[0], Value::Str("100".into()));
354 assert_eq!(col(&cols, "name").cells[1], Value::Str("port scan".into()));
355 let sev = col(&cols, "severity");
356 assert_eq!(sev.ty, ColType::Int, "severity is the analyzable numeric");
357 assert_eq!(sev.cells, vec![Value::Int(10), Value::Int(3)]);
358 }
359
360 #[test]
361 fn cef_extension_fields_typed_and_padded() {
362 let cols = cef(CEF);
363 assert_eq!(col(&cols, "src").cells[0], Value::Str("10.0.0.1".into()));
364 assert_eq!(col(&cols, "spt").cells[0], Value::Int(1232)); assert_eq!(col(&cols, "msg").cells[0], Value::Str("took action".into()));
366 assert_eq!(col(&cols, "dst").cells[0], Value::Null);
368 assert_eq!(col(&cols, "spt").cells[1], Value::Null);
369 }
370
371 #[test]
372 fn cef_escaped_pipe_in_header() {
373 let cols = cef(r"CEF:0|Sec\|ops|prod|1|1|n|5|");
374 assert_eq!(
375 col(&cols, "deviceVendor").cells[0],
376 Value::Str("Sec|ops".into())
377 );
378 }
379
380 #[test]
381 fn cef_without_extension() {
382 let cols = cef("CEF:0|v|p|1.0|42|evt|7\n"); assert_eq!(col(&cols, "signatureId").cells[0], Value::Str("42".into()));
384 assert_eq!(col(&cols, "severity").cells[0], Value::Int(7));
385 }
386
387 #[test]
388 fn cef_malformed_too_few_fields_errors() {
389 assert!(matches!(
390 CefParser.parse("-", b"CEF:0|only|three\n"),
391 Err(AxError::Parse { .. })
392 ));
393 assert!(matches!(
394 CefParser.parse("-", b"not a cef line\n"),
395 Err(AxError::Parse { .. })
396 ));
397 }
398
399 #[test]
400 fn cef_sniff_and_resolution() {
401 assert_eq!(CefParser.sniff(CEF.as_bytes()), Some(STRONG));
402 assert_eq!(CefParser.sniff(b"LEEF:1.0|v|p|1|x|"), None);
403 assert_eq!(CefParser.sniff(b"a,b,c\n1,2,3"), None);
404 assert_eq!(CefParser.extensions(), &["cef"]);
405 let reg = crate::parser::ParserRegistry::default();
406 assert_eq!(reg.resolve("e.cef", b"x").unwrap().id(), "cef");
407 assert_eq!(reg.resolve("-", CEF.as_bytes()).unwrap().id(), "cef");
408 }
409
410 #[test]
413 fn leef_v1_tab_extension() {
414 let line = "LEEF:1.0|Lancope|StealthWatch|1.0|41|src=192.0.2.0\tdst=172.50.123.1\tsev=5\n";
415 let cols = LeefParser.parse("-", line.as_bytes()).unwrap();
416 assert_eq!(col(&cols, "leefVersion").cells[0], Value::Str("1.0".into()));
417 assert_eq!(col(&cols, "vendor").cells[0], Value::Str("Lancope".into()));
418 assert_eq!(col(&cols, "eventId").cells[0], Value::Str("41".into()));
419 assert_eq!(col(&cols, "src").cells[0], Value::Str("192.0.2.0".into()));
420 assert_eq!(col(&cols, "sev").cells[0], Value::Int(5));
421 }
422
423 #[test]
424 fn leef_header_only_no_extension() {
425 let cols = LeefParser.parse("-", b"LEEF:1.0|Acme|Tool|2|77").unwrap();
428 assert_eq!(col(&cols, "eventId").cells[0], Value::Str("77".into()));
429 assert_eq!(col(&cols, "vendor").cells[0], Value::Str("Acme".into()));
430 }
431
432 #[test]
433 fn leef_v2_explicit_delimiter() {
434 let line = "LEEF:2.0|Vendor|Product|2.5|1001|^|src=10.0.0.1^dst=10.0.0.2^spt=22\n";
436 let cols = LeefParser.parse("-", line.as_bytes()).unwrap();
437 assert_eq!(col(&cols, "eventId").cells[0], Value::Str("1001".into()));
438 assert_eq!(col(&cols, "src").cells[0], Value::Str("10.0.0.1".into()));
439 assert_eq!(col(&cols, "spt").cells[0], Value::Int(22));
440 assert!(cols.iter().all(|c| c.name != "^"));
442 }
443
444 #[test]
445 fn leef_v2_hex_delimiter() {
446 let line = "LEEF:2.0|V|P|1|99|x09|a=1\tb=2\n";
448 let cols = LeefParser.parse("-", line.as_bytes()).unwrap();
449 assert_eq!(col(&cols, "a").cells[0], Value::Int(1));
450 assert_eq!(col(&cols, "b").cells[0], Value::Int(2));
451 }
452
453 #[test]
454 fn leef_malformed_and_sniff() {
455 assert!(matches!(
456 LeefParser.parse("-", b"LEEF:1.0|onlytwo\n"),
457 Err(AxError::Parse { .. })
458 ));
459 assert!(matches!(
460 LeefParser.parse("-", b"not leef\n"),
461 Err(AxError::Parse { .. })
462 ));
463 assert_eq!(LeefParser.sniff(b"LEEF:1.0|v|p|1|x|a=1"), Some(STRONG));
464 assert_eq!(LeefParser.sniff(b"CEF:0|v|p|1|1|n|5|"), None);
465 assert_eq!(LeefParser.extensions(), &["leef"]);
466 let reg = crate::parser::ParserRegistry::default();
467 assert_eq!(reg.resolve("e.leef", b"x").unwrap().id(), "leef");
468 assert_eq!(
469 reg.resolve("-", b"LEEF:1.0|v|p|1|x|a=1\n").unwrap().id(),
470 "leef"
471 );
472 }
473}