Skip to main content

rsigma_runtime/input/
syslog.rs

1//! Syslog RFC 3164 / 5424 input adapter.
2//!
3//! Wraps [`syslog_loose::parse_message`] and extracts structured data, header
4//! fields, and the message body into a [`KvEvent`].
5//!
6//! ## Edge cases handled
7//!
8//! - **Embedded JSON in msg**: if the `msg` field parses as a JSON object,
9//!   the adapter returns a `JsonEvent` with the syslog header fields merged in.
10//! - **Year resolution (RFC 3164)**: timestamps lack a year; defaults to
11//!   current year with December→January rollover logic.
12//! - **Timezone**: RFC 3164 may lack timezone info; configurable default (UTC).
13
14use rsigma_eval::{JsonEvent, KvEvent};
15use syslog_loose::Message;
16
17use super::EventInputDecoded;
18
19/// Configuration for the syslog adapter.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct SyslogConfig {
22    /// Default timezone offset in seconds east of UTC for RFC 3164 messages
23    /// that lack timezone information.
24    pub default_tz_offset_secs: i32,
25    /// Strip a leading UTF-8 BOM (`U+FEFF`) from the syslog message body.
26    ///
27    /// RFC 5424 section 6.4 mandates that a UTF-8 `MSG` begin with a BOM as an
28    /// encoding marker, not as content. `syslog_loose` preserves it verbatim,
29    /// so without this the BOM leaks into `_raw` (breaking anchored matchers)
30    /// and blocks embedded-JSON detection (`serde_json` errors on a leading
31    /// BOM). Defaults to `true`; disable to keep the message byte-for-byte.
32    pub strip_bom: bool,
33}
34
35impl Default for SyslogConfig {
36    fn default() -> Self {
37        Self {
38            default_tz_offset_secs: 0,
39            strip_bom: true,
40        }
41    }
42}
43
44/// Parse a syslog line into an event.
45///
46/// If the syslog `msg` body contains a valid JSON object, returns a
47/// `JsonEvent` with syslog headers merged in. Otherwise returns a `KvEvent`
48/// with syslog fields as key-value pairs.
49pub fn parse_syslog(line: &str, config: &SyslogConfig) -> EventInputDecoded {
50    let tz = chrono::FixedOffset::east_opt(config.default_tz_offset_secs)
51        .unwrap_or(chrono::FixedOffset::east_opt(0).unwrap());
52
53    let parsed = syslog_loose::parse_message_with_year_tz(
54        line,
55        resolve_year,
56        Some(tz),
57        syslog_loose::Variant::Either,
58    );
59
60    build_event_from_message(&parsed, config.strip_bom)
61}
62
63/// Build an EventInputDecoded from a parsed syslog message.
64///
65/// When `strip_bom` is set, a single leading UTF-8 BOM (`U+FEFF`) is removed
66/// from the message body before JSON detection and `_raw` extraction. See
67/// [`SyslogConfig::strip_bom`].
68fn build_event_from_message(parsed: &Message<&str>, strip_bom: bool) -> EventInputDecoded {
69    let msg = if strip_bom {
70        parsed.msg.strip_prefix('\u{FEFF}').unwrap_or(parsed.msg)
71    } else {
72        parsed.msg
73    };
74    let msg_str = msg.trim();
75
76    // Try to parse the message body as JSON.
77    if let Ok(mut json_obj) = serde_json::from_str::<serde_json::Value>(msg_str)
78        && let Some(obj) = json_obj.as_object_mut()
79    {
80        inject_syslog_headers(parsed, obj);
81        return EventInputDecoded::Json(JsonEvent::owned(serde_json::Value::Object(obj.clone())));
82    }
83
84    // Not JSON — build a KvEvent from syslog fields.
85    let mut fields = Vec::new();
86
87    if let Some(ts) = &parsed.timestamp {
88        fields.push(("timestamp".to_string(), ts.to_rfc3339()));
89    }
90    if let Some(host) = &parsed.hostname {
91        fields.push(("hostname".to_string(), host.to_string()));
92    }
93    if let Some(app) = &parsed.appname {
94        fields.push(("appname".to_string(), app.to_string()));
95    }
96    if let Some(pid) = &parsed.procid {
97        fields.push(("procid".to_string(), pid.to_string()));
98    }
99    if let Some(mid) = &parsed.msgid {
100        fields.push(("msgid".to_string(), mid.to_string()));
101    }
102    if let Some(facility) = &parsed.facility {
103        fields.push(("facility".to_string(), format!("{facility:?}")));
104    }
105    if let Some(severity) = &parsed.severity {
106        fields.push(("severity".to_string(), format!("{severity:?}")));
107    }
108
109    // Extract RFC 5424 structured data key-value pairs.
110    for elem in &parsed.structured_data {
111        for (key, val) in elem.params() {
112            let prefixed_key = format!("{}.{}", elem.id, key);
113            fields.push((prefixed_key, val));
114        }
115    }
116
117    if !msg_str.is_empty() {
118        fields.push(("_raw".to_string(), msg_str.to_string()));
119    }
120
121    EventInputDecoded::Kv(KvEvent::new(fields))
122}
123
124/// Inject syslog header fields into a JSON object (for embedded-JSON case).
125///
126/// Includes all fields that the KvEvent path extracts: timestamp, hostname,
127/// appname, procid, msgid, facility, severity, and RFC 5424 structured data.
128fn inject_syslog_headers(
129    parsed: &Message<&str>,
130    obj: &mut serde_json::Map<String, serde_json::Value>,
131) {
132    if let Some(ts) = &parsed.timestamp {
133        obj.entry("syslog_timestamp")
134            .or_insert_with(|| serde_json::Value::String(ts.to_rfc3339()));
135    }
136    if let Some(host) = &parsed.hostname {
137        obj.entry("syslog_hostname")
138            .or_insert_with(|| serde_json::Value::String(host.to_string()));
139    }
140    if let Some(app) = &parsed.appname {
141        obj.entry("syslog_appname")
142            .or_insert_with(|| serde_json::Value::String(app.to_string()));
143    }
144    if let Some(pid) = &parsed.procid {
145        obj.entry("syslog_procid")
146            .or_insert_with(|| serde_json::Value::String(pid.to_string()));
147    }
148    if let Some(mid) = &parsed.msgid {
149        obj.entry("syslog_msgid")
150            .or_insert_with(|| serde_json::Value::String(mid.to_string()));
151    }
152    if let Some(facility) = &parsed.facility {
153        obj.entry("syslog_facility")
154            .or_insert_with(|| serde_json::Value::String(format!("{facility:?}")));
155    }
156    if let Some(severity) = &parsed.severity {
157        obj.entry("syslog_severity")
158            .or_insert_with(|| serde_json::Value::String(format!("{severity:?}")));
159    }
160
161    // RFC 5424 structured data parameters.
162    for elem in &parsed.structured_data {
163        for (key, val) in elem.params() {
164            let prefixed_key = format!("sd.{}.{}", elem.id, key);
165            obj.entry(prefixed_key)
166                .or_insert_with(|| serde_json::Value::String(val));
167        }
168    }
169}
170
171/// Year resolver for RFC 3164 timestamps.
172///
173/// `IncompleteDate` is `(month, day, hour, minute, second)`. Uses the current
174/// year, with December→January rollover: if the parsed month is January and
175/// we're in December, assume next year (and vice versa).
176fn resolve_year(date: syslog_loose::IncompleteDate) -> i32 {
177    let now = chrono::Utc::now();
178    let current_year = chrono::Datelike::year(&now);
179    let current_month = chrono::Datelike::month(&now);
180    let parsed_month = date.0;
181
182    if current_month == 12 && parsed_month == 1 {
183        current_year + 1
184    } else if current_month == 1 && parsed_month == 12 {
185        current_year - 1
186    } else {
187        current_year
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194    use rsigma_eval::Event;
195
196    #[test]
197    fn rfc5424_basic() {
198        let line = "<165>1 2024-01-15T10:30:00.000Z web01 myapp 1234 ID47 - Connection established";
199        let decoded = parse_syslog(line, &SyslogConfig::default());
200        assert!(decoded.get_field("hostname").is_some());
201        assert!(decoded.get_field("appname").is_some());
202        assert!(decoded.get_field("_raw").is_some());
203    }
204
205    #[test]
206    fn rfc3164_basic() {
207        let line = "<34>Oct 11 22:14:15 mymachine su: 'su root' failed for lonvick on /dev/pts/8";
208        let decoded = parse_syslog(line, &SyslogConfig::default());
209        assert!(decoded.any_string_value(&|s| s.contains("su root")));
210    }
211
212    #[test]
213    fn syslog_wrapped_json() {
214        let line = r#"<134>1 2024-01-15T10:30:00Z docker01 myapp 9876 MSGID1 - {"EventID": 1, "user": "admin"}"#;
215        let decoded = parse_syslog(line, &SyslogConfig::default());
216        assert!(decoded.get_field("EventID").is_some());
217        assert!(decoded.get_field("user").is_some());
218        // Syslog headers should be merged into the JSON object.
219        assert!(decoded.get_field("syslog_hostname").is_some());
220        assert!(decoded.get_field("syslog_appname").is_some());
221    }
222
223    #[test]
224    fn rfc5424_structured_data() {
225        let line = r#"<165>1 2024-01-15T10:30:00Z host app - ID1 [exampleSDID@32473 iut="3" eventSource="App" eventID="1011"] message"#;
226        let decoded = parse_syslog(line, &SyslogConfig::default());
227        let json = decoded.to_json();
228        let json_str = serde_json::to_string(&json).unwrap();
229        assert!(json_str.contains("eventSource") || json_str.contains("_raw"));
230    }
231
232    #[test]
233    fn empty_msg() {
234        let line = "<13>1 2024-01-15T10:30:00Z host app - - -";
235        let decoded = parse_syslog(line, &SyslogConfig::default());
236        assert!(decoded.get_field("hostname").is_some());
237    }
238
239    #[test]
240    fn custom_timezone() {
241        let config = SyslogConfig {
242            default_tz_offset_secs: 5 * 3600, // UTC+5
243            ..SyslogConfig::default()
244        };
245        let line = "<34>Oct 11 22:14:15 mymachine su: test message";
246        let decoded = parse_syslog(line, &config);
247        assert!(decoded.any_string_value(&|s| s.contains("test message")));
248    }
249
250    #[test]
251    fn rfc5424_strips_bom() {
252        // RFC 5424 UTF-8 MSG begins with a BOM (U+FEFF) as an encoding marker.
253        let line =
254            "<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - \u{FEFF}an event";
255        let decoded = parse_syslog(line, &SyslogConfig::default());
256        let raw = decoded
257            .get_field("_raw")
258            .and_then(|v| v.as_str().map(|s| s.into_owned()))
259            .expect("_raw present");
260        assert!(!raw.starts_with('\u{FEFF}'), "BOM should be stripped");
261        assert_eq!(raw, "an event");
262    }
263
264    #[test]
265    fn rfc5424_bom_json_detected() {
266        // A BOM-prefixed JSON payload must still be detected as embedded JSON;
267        // serde_json errors on a leading BOM, so this would degrade to a
268        // KvEvent without the strip.
269        let line = "<134>1 2024-01-15T10:30:00Z docker01 myapp 9876 MSGID1 - \u{FEFF}{\"EventID\": 1, \"user\": \"admin\"}";
270        let decoded = parse_syslog(line, &SyslogConfig::default());
271        assert!(
272            matches!(decoded, EventInputDecoded::Json(_)),
273            "BOM-prefixed JSON should be parsed as JSON"
274        );
275        assert!(decoded.get_field("EventID").is_some());
276        assert!(decoded.get_field("user").is_some());
277        assert!(decoded.get_field("syslog_hostname").is_some());
278    }
279
280    #[test]
281    fn rfc5424_keep_bom_when_disabled() {
282        let config = SyslogConfig {
283            strip_bom: false,
284            ..SyslogConfig::default()
285        };
286        let line =
287            "<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - \u{FEFF}an event";
288        let decoded = parse_syslog(line, &config);
289        let raw = decoded
290            .get_field("_raw")
291            .and_then(|v| v.as_str().map(|s| s.into_owned()))
292            .expect("_raw present");
293        assert!(
294            raw.starts_with('\u{FEFF}'),
295            "BOM should be preserved when stripping is disabled"
296        );
297    }
298
299    #[test]
300    fn bom_only_message() {
301        // A message consisting solely of a BOM collapses to empty after the
302        // strip, so no _raw field is emitted.
303        let line = "<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - \u{FEFF}";
304        let decoded = parse_syslog(line, &SyslogConfig::default());
305        assert!(decoded.get_field("_raw").is_none());
306        assert!(decoded.get_field("hostname").is_some());
307    }
308}