1use std::fmt;
44
45#[derive(Debug, Clone, PartialEq)]
47pub struct CefRecord {
48 pub version: u32,
50 pub device_vendor: String,
51 pub device_product: String,
52 pub device_version: String,
53 pub signature_id: String,
54 pub name: String,
55 pub severity: String,
56 pub extensions: Vec<(String, String)>,
58}
59
60#[derive(Debug, Clone, PartialEq)]
62pub enum CefError {
63 NotCef,
65 IncompleteHeader,
67 InvalidVersion,
69}
70
71impl fmt::Display for CefError {
72 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73 match self {
74 CefError::NotCef => write!(f, "input does not contain a CEF header"),
75 CefError::IncompleteHeader => {
76 write!(f, "CEF header requires 7 pipe-delimited fields")
77 }
78 CefError::InvalidVersion => write!(f, "CEF version is not a valid integer"),
79 }
80 }
81}
82
83impl std::error::Error for CefError {}
84
85pub fn find_cef_start(input: &str) -> Option<usize> {
89 input.find("CEF:")
90}
91
92pub fn parse(input: &str) -> Result<CefRecord, CefError> {
97 let input = input.trim();
98
99 let cef_start = find_cef_start(input).ok_or(CefError::NotCef)?;
101 let after_marker = &input[cef_start + 4..]; let header_fields = split_header(after_marker);
106 if header_fields.len() < 8 {
107 return Err(CefError::IncompleteHeader);
108 }
109
110 let version: u32 = header_fields[0]
111 .trim()
112 .parse()
113 .map_err(|_| CefError::InvalidVersion)?;
114
115 let extensions = if header_fields.len() > 7 {
116 parse_extensions(header_fields[7])
117 } else {
118 Vec::new()
119 };
120
121 Ok(CefRecord {
122 version,
123 device_vendor: unescape_header(header_fields[1]),
124 device_product: unescape_header(header_fields[2]),
125 device_version: unescape_header(header_fields[3]),
126 signature_id: unescape_header(header_fields[4]),
127 name: unescape_header(header_fields[5]),
128 severity: unescape_header(header_fields[6]),
129 extensions,
130 })
131}
132
133fn split_header(input: &str) -> Vec<&str> {
138 let bytes = input.as_bytes();
139 let len = bytes.len();
140 let mut segments = Vec::with_capacity(8);
141 let mut start = 0;
142 let mut pipe_count = 0;
143
144 let mut i = 0;
145 while i < len {
146 if bytes[i] == b'\\' && i + 1 < len {
147 i += 2;
149 continue;
150 }
151 if bytes[i] == b'|' {
152 segments.push(&input[start..i]);
153 start = i + 1;
154 pipe_count += 1;
155 if pipe_count == 7 {
156 segments.push(&input[start..]);
158 return segments;
159 }
160 }
161 i += 1;
162 }
163
164 if start <= len {
166 segments.push(&input[start..]);
167 }
168 segments
169}
170
171fn unescape_header(input: &str) -> String {
173 let bytes = input.as_bytes();
174 let len = bytes.len();
175 let mut out = String::with_capacity(len);
176 let mut i = 0;
177
178 while i < len {
179 if bytes[i] == b'\\' && i + 1 < len {
180 match bytes[i + 1] {
181 b'|' => {
182 out.push('|');
183 i += 2;
184 }
185 b'\\' => {
186 out.push('\\');
187 i += 2;
188 }
189 _ => {
190 out.push('\\');
191 i += 1;
192 }
193 }
194 } else {
195 out.push(bytes[i] as char);
196 i += 1;
197 }
198 }
199
200 out
201}
202
203fn parse_extensions(input: &str) -> Vec<(String, String)> {
212 let input = input.trim();
213 if input.is_empty() {
214 return Vec::new();
215 }
216
217 let segments = split_on_unescaped_eq(input);
218 if segments.len() < 2 {
219 return Vec::new();
220 }
221
222 let mut pairs = Vec::new();
223 let n = segments.len();
224 let mut current_key = extract_last_word(segments[0]);
225
226 for (i, segment) in segments.iter().enumerate().skip(1) {
227 let key = std::mem::take(&mut current_key);
228 if i < n - 1 {
229 match segment.rsplit_once(' ') {
232 Some((value_part, next_key)) => {
233 pairs.push((key, unescape_extension(value_part.trim())));
234 current_key = next_key.to_string();
235 }
236 None => {
237 pairs.push((key, unescape_extension(segment.trim())));
239 }
240 }
241 } else {
242 pairs.push((key, unescape_extension(segment.trim())));
244 }
245 }
246
247 pairs
248}
249
250fn split_on_unescaped_eq(input: &str) -> Vec<&str> {
252 let bytes = input.as_bytes();
253 let len = bytes.len();
254 let mut segments = Vec::new();
255 let mut start = 0;
256 let mut i = 0;
257
258 while i < len {
259 if bytes[i] == b'\\' && i + 1 < len {
260 i += 2; continue;
262 }
263 if bytes[i] == b'=' {
264 segments.push(&input[start..i]);
265 start = i + 1;
266 }
267 i += 1;
268 }
269 segments.push(&input[start..]);
270 segments
271}
272
273fn extract_last_word(s: &str) -> String {
275 s.rsplit_once(' ')
276 .map(|(_, last)| last)
277 .unwrap_or(s)
278 .to_string()
279}
280
281fn unescape_extension(input: &str) -> String {
283 let bytes = input.as_bytes();
284 let len = bytes.len();
285 let mut out = String::with_capacity(len);
286 let mut i = 0;
287
288 while i < len {
289 if bytes[i] == b'\\' && i + 1 < len {
290 match bytes[i + 1] {
291 b'=' => {
292 out.push('=');
293 i += 2;
294 }
295 b'\\' => {
296 out.push('\\');
297 i += 2;
298 }
299 b'n' => {
300 out.push('\n');
301 i += 2;
302 }
303 b'r' => {
304 out.push('\r');
305 i += 2;
306 }
307 _ => {
308 out.push('\\');
309 i += 1;
310 }
311 }
312 } else {
313 out.push(bytes[i] as char);
314 i += 1;
315 }
316 }
317
318 out
319}
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
328 fn minimal_cef() {
329 let r = parse("CEF:0|Vendor|Product|1.0|100|Name|5|").unwrap();
330 assert_eq!(r.version, 0);
331 assert_eq!(r.device_vendor, "Vendor");
332 assert_eq!(r.device_product, "Product");
333 assert_eq!(r.device_version, "1.0");
334 assert_eq!(r.signature_id, "100");
335 assert_eq!(r.name, "Name");
336 assert_eq!(r.severity, "5");
337 assert!(r.extensions.is_empty());
338 }
339
340 #[test]
341 fn header_without_trailing_pipe_extensions() {
342 let r = parse("CEF:0|Vendor|Product|1.0|100|Name|5|src=10.0.0.1 dst=192.168.1.1").unwrap();
343 assert_eq!(r.extensions.len(), 2);
344 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
345 assert_eq!(r.extensions[1], ("dst".into(), "192.168.1.1".into()));
346 }
347
348 #[test]
349 fn escaped_pipe_in_header() {
350 let r = parse(r"CEF:0|Ven\|dor|Product|1.0|100|Na\|me|5|").unwrap();
351 assert_eq!(r.device_vendor, "Ven|dor");
352 assert_eq!(r.name, "Na|me");
353 }
354
355 #[test]
356 fn escaped_backslash_in_header() {
357 let r = parse(r"CEF:0|Ven\\dor|Product|1.0|100|Name|5|").unwrap();
358 assert_eq!(r.device_vendor, r"Ven\dor");
359 }
360
361 #[test]
362 fn not_cef() {
363 assert_eq!(parse("not a CEF line"), Err(CefError::NotCef));
364 }
365
366 #[test]
367 fn incomplete_header() {
368 assert_eq!(
369 parse("CEF:0|Vendor|Product"),
370 Err(CefError::IncompleteHeader)
371 );
372 }
373
374 #[test]
375 fn invalid_version() {
376 assert_eq!(
377 parse("CEF:abc|Vendor|Product|1.0|100|Name|5|"),
378 Err(CefError::InvalidVersion)
379 );
380 }
381
382 #[test]
385 fn single_extension() {
386 let r = parse("CEF:0|V|P|1|1|N|1|src=10.0.0.1").unwrap();
387 assert_eq!(r.extensions, vec![("src".into(), "10.0.0.1".into())]);
388 }
389
390 #[test]
391 fn multiple_extensions() {
392 let r = parse("CEF:0|V|P|1|1|N|1|src=10.0.0.1 dst=192.168.1.1 dpt=443").unwrap();
393 assert_eq!(r.extensions.len(), 3);
394 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
395 assert_eq!(r.extensions[1], ("dst".into(), "192.168.1.1".into()));
396 assert_eq!(r.extensions[2], ("dpt".into(), "443".into()));
397 }
398
399 #[test]
400 fn extension_value_with_spaces() {
401 let r = parse("CEF:0|V|P|1|1|N|1|msg=User signed in from 10.0.0.1 src=10.0.0.1").unwrap();
402 assert_eq!(r.extensions.len(), 2);
403 assert_eq!(
404 r.extensions[0],
405 ("msg".into(), "User signed in from 10.0.0.1".into())
406 );
407 assert_eq!(r.extensions[1], ("src".into(), "10.0.0.1".into()));
408 }
409
410 #[test]
411 fn extension_escaped_equals() {
412 let r =
413 parse(r"CEF:0|V|P|1|1|N|1|request=https://example.com?foo\=bar src=10.0.0.1").unwrap();
414 assert_eq!(r.extensions.len(), 2);
415 assert_eq!(
416 r.extensions[0],
417 ("request".into(), "https://example.com?foo=bar".into())
418 );
419 }
420
421 #[test]
422 fn extension_escaped_backslash() {
423 let r = parse(r"CEF:0|V|P|1|1|N|1|path=C:\\Windows\\System32").unwrap();
424 assert_eq!(
425 r.extensions[0],
426 ("path".into(), r"C:\Windows\System32".into())
427 );
428 }
429
430 #[test]
431 fn extension_escaped_newline() {
432 let r = parse(r"CEF:0|V|P|1|1|N|1|msg=line1\nline2").unwrap();
433 assert_eq!(r.extensions[0], ("msg".into(), "line1\nline2".into()));
434 }
435
436 #[test]
437 fn extension_escaped_cr() {
438 let r = parse(r"CEF:0|V|P|1|1|N|1|msg=line1\rline2").unwrap();
439 assert_eq!(r.extensions[0], ("msg".into(), "line1\rline2".into()));
440 }
441
442 #[test]
443 fn empty_extensions() {
444 let r = parse("CEF:0|V|P|1|1|N|1|").unwrap();
445 assert!(r.extensions.is_empty());
446 }
447
448 #[test]
449 fn whitespace_only_extensions() {
450 let r = parse("CEF:0|V|P|1|1|N|1| ").unwrap();
451 assert!(r.extensions.is_empty());
452 }
453
454 #[test]
457 fn find_cef_in_syslog() {
458 let line = "<134>2022-02-14T03:17:30-08:00 host CEF:0|V|P|1|1|N|1|src=10.0.0.1";
459 let offset = find_cef_start(line).unwrap();
460 let r = parse(&line[offset..]).unwrap();
461 assert_eq!(r.device_vendor, "V");
462 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
463 }
464
465 #[test]
466 fn find_cef_no_match() {
467 assert_eq!(find_cef_start("just a regular log line"), None);
468 }
469
470 #[test]
473 fn real_world_arcsight() {
474 let line = "CEF:0|ArcSight|ArcSight|7.0.0|agent:030|Agent Started|1|deviceExternalId=001 rt=1644800250000 cat=agent msg=ArcSight agent started successfully";
475 let r = parse(line).unwrap();
476 assert_eq!(r.device_vendor, "ArcSight");
477 assert_eq!(r.name, "Agent Started");
478 assert_eq!(r.extensions.len(), 4);
479 assert_eq!(r.extensions[0], ("deviceExternalId".into(), "001".into()));
480 assert_eq!(r.extensions[1], ("rt".into(), "1644800250000".into()));
481 assert_eq!(r.extensions[2], ("cat".into(), "agent".into()));
482 assert_eq!(
483 r.extensions[3],
484 ("msg".into(), "ArcSight agent started successfully".into())
485 );
486 }
487
488 #[test]
489 fn real_world_with_labels() {
490 let line = "CEF:0|Vendor|Firewall|2.0|100|Connection Blocked|8|src=10.0.0.1 dst=192.168.1.100 spt=12345 dpt=443 proto=TCP act=blocked";
491 let r = parse(line).unwrap();
492 assert_eq!(r.extensions.len(), 6);
493 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
494 assert_eq!(r.extensions[1], ("dst".into(), "192.168.1.100".into()));
495 assert_eq!(r.extensions[2], ("spt".into(), "12345".into()));
496 assert_eq!(r.extensions[3], ("dpt".into(), "443".into()));
497 assert_eq!(r.extensions[4], ("proto".into(), "TCP".into()));
498 assert_eq!(r.extensions[5], ("act".into(), "blocked".into()));
499 }
500
501 #[test]
502 fn real_world_syslog_wrapped_cef() {
503 let line = "<134>Feb 14 19:04:54 firewall01 CEF:0|Palo Alto|PAN-OS|10.1|THREAT|threat|7|src=172.16.0.5 dst=10.10.10.1 msg=Malware detected in file transfer";
504 let offset = find_cef_start(line).unwrap();
505 let r = parse(&line[offset..]).unwrap();
506 assert_eq!(r.device_vendor, "Palo Alto");
507 assert_eq!(r.device_product, "PAN-OS");
508 assert_eq!(r.extensions.len(), 3);
509 assert_eq!(
510 r.extensions[2],
511 ("msg".into(), "Malware detected in file transfer".into())
512 );
513 }
514
515 #[test]
516 fn extension_single_value_no_spaces() {
517 let r = parse("CEF:0|V|P|1|1|N|1|src=10.0.0.1").unwrap();
518 assert_eq!(r.extensions.len(), 1);
519 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
520 }
521
522 #[test]
523 fn extension_last_value_has_spaces() {
524 let r = parse("CEF:0|V|P|1|1|N|1|src=10.0.0.1 msg=This is the final message").unwrap();
525 assert_eq!(r.extensions.len(), 2);
526 assert_eq!(r.extensions[0], ("src".into(), "10.0.0.1".into()));
527 assert_eq!(
528 r.extensions[1],
529 ("msg".into(), "This is the final message".into())
530 );
531 }
532
533 #[test]
534 fn version_1() {
535 let r = parse("CEF:1|V|P|1|1|N|1|src=10.0.0.1").unwrap();
536 assert_eq!(r.version, 1);
537 }
538}