anomalyx_normalize/parsers/
syslog.rs1use crate::parser::{Confidence, FormatParser, STRONG};
24use crate::table::TableBuilder;
25use ax_core::{AxError, Column, Value};
26use chrono::Utc;
27use std::collections::BTreeMap;
28use syslog_loose::{parse_message_with_year_tz, ProcId, Protocol, Variant};
29
30#[derive(Debug, Default, Clone)]
31pub struct SyslogParser;
32
33const SENTINEL_YEAR: i32 = 1970;
36
37fn looks_like_syslog(line: &str) -> bool {
43 if parse_pri(line).is_some() {
44 return true;
45 }
46 let m = parse_message_with_year_tz(line, |_| SENTINEL_YEAR, Some(Utc), Variant::Either);
47 m.timestamp.is_some() && m.hostname.is_some() && m.appname.is_some()
48}
49
50fn parse_pri(line: &str) -> Option<(i64, i64)> {
53 let rest = line.strip_prefix('<')?;
54 let end = rest.find('>')?;
55 let pri: u16 = rest[..end].parse().ok()?;
56 (pri <= 191).then_some(((pri / 8) as i64, (pri % 8) as i64))
57}
58
59impl SyslogParser {
60 fn err(&self, msg: impl std::fmt::Display) -> AxError {
61 AxError::Parse {
62 format: self.id().to_string(),
63 message: msg.to_string(),
64 }
65 }
66}
67
68impl FormatParser for SyslogParser {
69 fn id(&self) -> &'static str {
70 "syslog"
71 }
72 fn extensions(&self) -> &'static [&'static str] {
73 &["syslog"]
74 }
75 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
76 let text = std::str::from_utf8(bytes).ok()?;
77 let line = text.lines().find(|l| !l.trim().is_empty())?;
78 looks_like_syslog(line).then_some(STRONG)
79 }
80 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
81 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
82 let mut builder = TableBuilder::new();
83 for line in text.lines() {
84 if line.trim().is_empty() {
85 continue;
86 }
87 let pri = parse_pri(line);
88 let msg =
89 parse_message_with_year_tz(line, |_| SENTINEL_YEAR, Some(Utc), Variant::Either);
90 if pri.is_none() && msg.timestamp.is_none() {
95 return Err(
96 self.err("not a syslog line: no <PRI> header and no recognizable timestamp")
97 );
98 }
99
100 let mut row: BTreeMap<String, Value> = BTreeMap::new();
101 if let Some((facility, severity)) = pri {
104 row.insert("facility".into(), Value::Int(facility));
105 row.insert("severity".into(), Value::Int(severity));
106 }
107 row.insert(
108 "protocol".into(),
109 Value::Str(
110 match msg.protocol {
111 Protocol::RFC3164 => "RFC3164",
112 Protocol::RFC5424(_) => "RFC5424",
113 }
114 .to_string(),
115 ),
116 );
117 if let Some(ts) = msg.timestamp {
118 row.insert("timestamp".into(), Value::Str(ts.to_string()));
119 }
120 if let Some(host) = msg.hostname {
121 row.insert("hostname".into(), Value::Str(host.to_string()));
122 }
123 if let Some(app) = msg.appname {
124 row.insert("appname".into(), Value::Str(app.to_string()));
125 }
126 if let Some(procid) = msg.procid {
127 let v = match procid {
128 ProcId::PID(pid) => Value::Int(pid as i64),
129 ProcId::Name(name) => Value::Str(name.to_string()),
130 };
131 row.insert("procid".into(), v);
132 }
133 if let Some(msgid) = msg.msgid {
134 row.insert("msgid".into(), Value::Str(msgid.to_string()));
135 }
136 for element in &msg.structured_data {
137 for (key, value) in &element.params {
138 row.insert(
139 format!("sd.{}.{}", element.id, key),
140 Value::Str(value.to_string()),
141 );
142 }
143 }
144 row.insert("message".into(), Value::Str(msg.msg.to_string()));
145 builder.push_row(row);
146 }
147 Ok(builder.finish())
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154 use ax_core::ColType;
155
156 const SYSLOG: &str = concat!(
157 r#"<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog 1234 ID47 [exampleSDID@32473 iut="3" eventID="1011"] App event log entry"#,
158 "\n",
159 "<34>Oct 11 22:14:15 mymachine su[567]: 'su root' failed for lonvick\n",
160 );
161
162 fn parse(s: &str) -> Vec<Column> {
163 SyslogParser.parse("-", s.as_bytes()).unwrap()
164 }
165 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
166 cols.iter()
167 .find(|c| c.name == name)
168 .unwrap_or_else(|| panic!("missing column {name}"))
169 }
170
171 #[test]
172 fn priority_decodes_to_facility_and_severity() {
173 let cols = parse(SYSLOG);
174 let fac = col(&cols, "facility");
175 let sev = col(&cols, "severity");
176 assert_eq!(fac.ty, ColType::Int);
177 assert_eq!(sev.ty, ColType::Int);
178 assert_eq!(fac.cells, vec![Value::Int(20), Value::Int(4)]); assert_eq!(sev.cells, vec![Value::Int(5), Value::Int(2)]); }
181
182 #[test]
183 fn both_rfc_variants_parse_their_fields() {
184 let cols = parse(SYSLOG);
185 assert_eq!(
186 col(&cols, "protocol").cells,
187 vec![Value::Str("RFC5424".into()), Value::Str("RFC3164".into())]
188 );
189 assert_eq!(
190 col(&cols, "hostname").cells,
191 vec![
192 Value::Str("mymachine.example.com".into()),
193 Value::Str("mymachine".into())
194 ]
195 );
196 assert_eq!(
197 col(&cols, "appname").cells,
198 vec![Value::Str("evntslog".into()), Value::Str("su".into())]
199 );
200 assert_eq!(
201 col(&cols, "procid").cells,
202 vec![Value::Int(1234), Value::Int(567)]
203 );
204 }
205
206 #[test]
207 fn rfc5424_only_fields_pad_with_null() {
208 let cols = parse(SYSLOG);
209 assert_eq!(col(&cols, "msgid").cells[0], Value::Str("ID47".into()));
211 assert_eq!(col(&cols, "msgid").cells[1], Value::Null);
212 let sd = col(&cols, "sd.exampleSDID@32473.iut");
213 assert_eq!(sd.cells[0], Value::Str("3".into()));
214 assert_eq!(sd.cells[1], Value::Null);
215 assert_eq!(
216 col(&cols, "sd.exampleSDID@32473.eventID").cells[0],
217 Value::Str("1011".into())
218 );
219 }
220
221 #[test]
222 fn message_body_is_captured() {
223 let cols = parse(SYSLOG);
224 let msg = col(&cols, "message");
225 assert_eq!(msg.cells[0], Value::Str("App event log entry".into()));
226 assert_eq!(
227 msg.cells[1],
228 Value::Str("'su root' failed for lonvick".into())
229 );
230 }
231
232 #[test]
233 fn deterministic_across_calls() {
234 assert_eq!(
237 format!("{:?}", parse(SYSLOG)),
238 format!("{:?}", parse(SYSLOG))
239 );
240 let cols = parse(SYSLOG);
242 let ts = col(&cols, "timestamp");
243 match &ts.cells[1] {
244 Value::Str(s) => assert!(s.starts_with("1970-"), "sentinel year, got {s}"),
245 other => panic!("expected Str timestamp, got {other:?}"),
246 }
247 }
248
249 #[test]
250 fn parse_pri_units() {
251 assert_eq!(parse_pri("<0>x"), Some((0, 0)));
252 assert_eq!(parse_pri("<34>x"), Some((4, 2)));
253 assert_eq!(parse_pri("<165>x"), Some((20, 5)));
254 assert_eq!(parse_pri("<191>x"), Some((23, 7))); assert_eq!(parse_pri("<192>x"), None); assert_eq!(parse_pri("<abc>x"), None); assert_eq!(parse_pri("<34"), None); assert_eq!(parse_pri("no bracket"), None);
259 }
260
261 #[test]
262 fn malformed_lines_error() {
263 assert!(matches!(
264 SyslogParser.parse("-", b"this is not syslog\n"),
265 Err(AxError::Parse { .. })
266 ));
267 assert!(matches!(
268 SyslogParser.parse("-", b"<192>priority out of range\n"),
269 Err(AxError::Parse { .. })
270 ));
271 }
272
273 const ISO_FILE: &[u8] =
275 b"2026-06-01T09:14:57.403686-07:00 4ubox NetworkManager[3524]: dhcp4 beginning\n";
276 const BSD_FILE: &[u8] = b"Jun 1 09:14:57 4ubox NetworkManager[3524]: dhcp4 beginning\n";
277
278 #[test]
279 fn sniff_keys_on_pri_header() {
280 assert_eq!(SyslogParser.sniff(SYSLOG.as_bytes()), Some(STRONG));
281 assert_eq!(
282 SyslogParser.sniff(b"<13>Feb 5 17:32:18 host app: msg\n"),
283 Some(STRONG)
284 );
285 assert_eq!(SyslogParser.sniff(b"<999>bad pri\n"), None); assert_eq!(SyslogParser.sniff(b"<?xml version=\"1.0\"?>"), None); assert_eq!(SyslogParser.sniff(b"plain text line\n"), None);
288 assert_eq!(SyslogParser.sniff(b"{\"a\":1}"), None);
289 assert_eq!(SyslogParser.sniff(b"a,b,c\n1,2,3"), None);
290 }
291
292 #[test]
293 fn sniff_recognizes_pri_less_file_format() {
294 assert_eq!(SyslogParser.sniff(ISO_FILE), Some(STRONG));
296 assert_eq!(SyslogParser.sniff(BSD_FILE), Some(STRONG));
297 assert_eq!(SyslogParser.sniff(b"2026-06-01T09:14:57,42,foo\n"), None);
299 let reg = crate::parser::ParserRegistry::default();
301 assert_eq!(reg.resolve("-", ISO_FILE).unwrap().id(), "syslog");
302 }
303
304 #[test]
305 fn pri_less_file_line_parses_without_facility_severity() {
306 let cols = SyslogParser.parse("-", ISO_FILE).unwrap();
307 assert_eq!(col(&cols, "hostname").cells[0], Value::Str("4ubox".into()));
309 assert_eq!(
310 col(&cols, "appname").cells[0],
311 Value::Str("NetworkManager".into())
312 );
313 assert_eq!(col(&cols, "procid").cells[0], Value::Int(3524));
314 assert!(
315 matches!(&col(&cols, "timestamp").cells[0], Value::Str(s) if s.starts_with("2026-06-01"))
316 );
317 assert!(cols
319 .iter()
320 .all(|c| c.name != "facility" && c.name != "severity"));
321 }
322
323 #[test]
324 fn claims_syslog_extension() {
325 assert_eq!(SyslogParser.extensions(), &["syslog"]);
326 }
327
328 #[test]
329 fn resolves_by_extension_and_content() {
330 let reg = crate::parser::ParserRegistry::default();
331 assert_eq!(
332 reg.resolve("app.syslog", b"<34>Oct 11 22:14:15 h a: m")
333 .unwrap()
334 .id(),
335 "syslog"
336 );
337 assert_eq!(reg.resolve("-", SYSLOG.as_bytes()).unwrap().id(), "syslog");
338 }
339}