Skip to main content

rsigma_runtime/input/
auto.rs

1//! Auto-detection input adapter.
2//!
3//! Attempts formats in order:
4//! 1. JSON — if `serde_json::from_str` succeeds → [`JsonEvent`]
5//! 2. Syslog — if [`syslog_loose`] extracts meaningful fields → [`KvEvent`]
6//! 3. Plain text — fallback → [`PlainEvent`]
7//!
8//! Auto-detection is per-line, so mixed-format input works.
9//! logfmt and CEF are **not** part of auto-detect because their syntax is
10//! too ambiguous for reliable detection (any line with `=` could be logfmt).
11
12use super::{EventInputDecoded, SyslogConfig, parse_json, parse_plain, parse_syslog};
13
14/// Auto-detect the format of a single line and parse it.
15///
16/// The `syslog_config` is used when the syslog path is selected (e.g. to
17/// honor `--syslog-tz` even in auto-detect mode).
18pub fn auto_detect(line: &str, syslog_config: &SyslogConfig) -> EventInputDecoded {
19    // 1. Try JSON first (fast: just check if it starts with '{' or '[').
20    let trimmed = line.trim_start();
21    if (trimmed.starts_with('{') || trimmed.starts_with('['))
22        && let Some(decoded) = parse_json(line)
23    {
24        return decoded;
25    }
26
27    // 2. Try syslog: if the line starts with '<' (priority), it's likely syslog.
28    if trimmed.starts_with('<') {
29        let decoded = parse_syslog(line, syslog_config);
30        // Check if syslog extracted meaningful fields (not just _raw).
31        if has_syslog_fields(&decoded) {
32            return decoded;
33        }
34    }
35
36    // 3. Fall back to plain text.
37    parse_plain(line)
38}
39
40/// Check if the syslog adapter extracted meaningful structured fields
41/// beyond just `_raw`. The syslog adapter never returns `Plain`, so only
42/// `Kv` and `Json` variants are possible here.
43fn has_syslog_fields(decoded: &EventInputDecoded) -> bool {
44    match decoded {
45        EventInputDecoded::Kv(kv) => kv.fields().iter().any(|(k, _)| k != "_raw"),
46        EventInputDecoded::Json(_) => true,
47        _ => false,
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54    use rsigma_eval::Event;
55
56    fn cfg() -> SyslogConfig {
57        SyslogConfig::default()
58    }
59
60    #[test]
61    fn auto_detect_json() {
62        let decoded = auto_detect(r#"{"EventID": 1, "host": "web01"}"#, &cfg());
63        assert!(matches!(decoded, EventInputDecoded::Json(_)));
64        assert!(decoded.get_field("EventID").is_some());
65    }
66
67    #[test]
68    fn auto_detect_syslog() {
69        let decoded = auto_detect(
70            "<34>Oct 11 22:14:15 mymachine su: 'su root' failed for lonvick",
71            &cfg(),
72        );
73        assert!(
74            matches!(
75                decoded,
76                EventInputDecoded::Kv(_) | EventInputDecoded::Json(_)
77            ),
78            "Expected Kv or Json for syslog, got Plain"
79        );
80    }
81
82    #[test]
83    fn auto_detect_plain() {
84        let decoded = auto_detect("ERROR: something went wrong on server", &cfg());
85        assert!(matches!(decoded, EventInputDecoded::Plain(_)));
86    }
87
88    #[test]
89    fn auto_detect_syslog_wrapped_json() {
90        let decoded = auto_detect(
91            r#"<134>1 2024-01-15T10:30:00Z host app - - - {"key": "value"}"#,
92            &cfg(),
93        );
94        assert!(
95            matches!(decoded, EventInputDecoded::Json(_)),
96            "Expected embedded JSON to be extracted"
97        );
98    }
99
100    #[test]
101    fn auto_detect_invalid_json_falls_through() {
102        let decoded = auto_detect("{not valid json}", &cfg());
103        assert!(matches!(decoded, EventInputDecoded::Plain(_)));
104    }
105
106    #[test]
107    fn mixed_format_batch() {
108        let c = cfg();
109        let lines = [
110            r#"{"EventID": 1}"#,
111            "<34>Oct 11 22:14:15 host su: test",
112            "plain log line",
113        ];
114        let results: Vec<_> = lines.iter().map(|l| auto_detect(l, &c)).collect();
115        assert!(matches!(results[0], EventInputDecoded::Json(_)));
116        assert!(matches!(
117            results[1],
118            EventInputDecoded::Kv(_) | EventInputDecoded::Json(_)
119        ));
120        assert!(matches!(results[2], EventInputDecoded::Plain(_)));
121    }
122
123    #[test]
124    fn auto_detect_syslog_respects_config() {
125        let config = SyslogConfig {
126            default_tz_offset_secs: 5 * 3600,
127        };
128        let decoded = auto_detect("<34>Oct 11 22:14:15 mymachine su: test", &config);
129        assert!(matches!(
130            decoded,
131            EventInputDecoded::Kv(_) | EventInputDecoded::Json(_)
132        ));
133    }
134}