use crate::event::Event;
use crate::pipeline::EventParser;
use anyhow::{Context, Result};
use regex::Regex;
use rhai::Dynamic;
pub struct SyslogParser {
rfc5424_regex: Regex,
rfc3164_regex: Regex,
auto_timestamp: bool,
}
impl SyslogParser {
fn build(auto_timestamp: bool) -> Result<Self> {
let rfc5424_regex = Regex::new(
r"^<(\d{1,3})>(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)(?:\s+(.*))?(?:\r?\n)?$",
)
.context("Failed to compile RFC5424 regex")?;
let rfc3164_regex = Regex::new(
r"^(?:<(\d{1,3})>)?(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+(\S+)\s+([^:\[\s]+)(?:\[(\d+)\])?\s*:\s*(.*)(?:\r?\n)?$"
).context("Failed to compile RFC3164 regex")?;
Ok(Self {
rfc5424_regex,
rfc3164_regex,
auto_timestamp,
})
}
pub fn new() -> Result<Self> {
Self::build(true)
}
pub fn new_without_auto_timestamp() -> Result<Self> {
Self::build(false)
}
fn parse_priority(priority: u32) -> (u32, u32) {
let facility = priority >> 3;
let severity = priority & 7;
(facility, severity)
}
fn severity_to_level(severity: u32) -> &'static str {
match severity {
0 => "EMERG",
1 => "ALERT",
2 => "CRIT",
3 => "ERROR",
4 => "WARN",
5 => "NOTICE",
6 => "INFO",
7 => "DEBUG",
_ => "UNKNOWN",
}
}
fn try_parse_rfc5424(&self, line: &str) -> Option<Event> {
if let Some(captures) = self.rfc5424_regex.captures(line) {
let priority_str = captures.get(1)?.as_str();
let priority: u32 = priority_str.parse().ok()?;
if priority > 191 {
return None;
}
let (facility, severity) = Self::parse_priority(priority);
let mut event = Event::with_capacity(line.to_string(), 11);
event.set_field("pri".to_string(), Dynamic::from(priority as i64));
event.set_field("facility".to_string(), Dynamic::from(facility as i64));
event.set_field("severity".to_string(), Dynamic::from(severity as i64));
event.set_field(
"level".to_string(),
Dynamic::from(Self::severity_to_level(severity)),
);
if let Some(version) = captures.get(2) {
if let Ok(v) = version.as_str().parse::<i64>() {
event.set_field("version".to_string(), Dynamic::from(v));
}
}
if let Some(ts) = captures.get(3) {
let ts_str = ts.as_str();
if ts_str != "-" {
event.set_field("ts".to_string(), Dynamic::from(ts_str.to_string()));
}
}
if let Some(host) = captures.get(4) {
let host_str = host.as_str();
if host_str != "-" {
event.set_field("host".to_string(), Dynamic::from(host_str.to_string()));
}
}
if let Some(prog) = captures.get(5) {
let prog_str = prog.as_str();
if prog_str != "-" {
event.set_field("prog".to_string(), Dynamic::from(prog_str.to_string()));
}
}
if let Some(pid) = captures.get(6) {
let pid_str = pid.as_str();
if pid_str != "-" {
if let Ok(pid_num) = pid_str.parse::<i64>() {
event.set_field("pid".to_string(), Dynamic::from(pid_num));
} else {
event.set_field("pid".to_string(), Dynamic::from(pid_str.to_string()));
}
}
}
if let Some(msgid) = captures.get(7) {
let msgid_str = msgid.as_str();
if msgid_str != "-" {
event.set_field("msgid".to_string(), Dynamic::from(msgid_str.to_string()));
}
}
if let Some(msg) = captures.get(9) {
event.set_field("msg".to_string(), Dynamic::from(msg.as_str().to_string()));
}
if self.auto_timestamp {
event.extract_timestamp();
}
Some(event)
} else {
None
}
}
fn try_parse_rfc3164(&self, line: &str) -> Option<Event> {
if let Some(captures) = self.rfc3164_regex.captures(line) {
let mut event = Event::with_capacity(line.to_string(), 8);
if let Some(priority_match) = captures.get(1) {
let priority: u32 = priority_match.as_str().parse().ok()?;
if priority > 191 {
return None;
}
let (facility, severity) = Self::parse_priority(priority);
event.set_field("pri".to_string(), Dynamic::from(priority as i64));
event.set_field("facility".to_string(), Dynamic::from(facility as i64));
event.set_field("severity".to_string(), Dynamic::from(severity as i64));
event.set_field(
"level".to_string(),
Dynamic::from(Self::severity_to_level(severity)),
);
}
if let Some(ts) = captures.get(2) {
event.set_field("ts".to_string(), Dynamic::from(ts.as_str().to_string()));
}
if let Some(host) = captures.get(3) {
event.set_field("host".to_string(), Dynamic::from(host.as_str().to_string()));
}
if let Some(prog) = captures.get(4) {
event.set_field("prog".to_string(), Dynamic::from(prog.as_str().to_string()));
}
if let Some(pid) = captures.get(5) {
if let Ok(pid_num) = pid.as_str().parse::<i64>() {
event.set_field("pid".to_string(), Dynamic::from(pid_num));
} else {
event.set_field("pid".to_string(), Dynamic::from(pid.as_str().to_string()));
}
}
if let Some(msg) = captures.get(6) {
event.set_field("msg".to_string(), Dynamic::from(msg.as_str().to_string()));
}
if self.auto_timestamp {
event.extract_timestamp();
}
Some(event)
} else {
None
}
}
}
impl EventParser for SyslogParser {
fn parse(&self, line: &str) -> Result<Event> {
let line = line.trim_end_matches('\n').trim_end_matches('\r');
if let Some(event) = self.try_parse_rfc5424(line) {
Ok(event)
} else if let Some(event) = self.try_parse_rfc3164(line) {
Ok(event)
} else {
Err(anyhow::anyhow!("Invalid syslog format"))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pipeline::EventParser;
#[test]
fn test_syslog_parser_rfc5424() {
let parser = SyslogParser::new().unwrap();
let line =
"<165>1 2023-10-11T22:14:15.003Z server01 sshd 1234 ID47 - Failed password for user";
let result = EventParser::parse(&parser, line).unwrap();
assert_eq!(result.fields.get("pri").unwrap().as_int().unwrap(), 165);
assert_eq!(result.fields.get("facility").unwrap().as_int().unwrap(), 20); assert_eq!(result.fields.get("severity").unwrap().as_int().unwrap(), 5);
assert_eq!(result.fields.get("version").unwrap().as_int().unwrap(), 1);
assert_eq!(
result
.fields
.get("ts")
.unwrap()
.clone()
.into_string()
.unwrap(),
"2023-10-11T22:14:15.003Z"
);
assert_eq!(
result
.fields
.get("host")
.unwrap()
.clone()
.into_string()
.unwrap(),
"server01"
);
assert_eq!(
result
.fields
.get("prog")
.unwrap()
.clone()
.into_string()
.unwrap(),
"sshd"
);
assert_eq!(result.fields.get("pid").unwrap().as_int().unwrap(), 1234);
assert_eq!(
result
.fields
.get("msgid")
.unwrap()
.clone()
.into_string()
.unwrap(),
"ID47"
);
assert_eq!(
result
.fields
.get("msg")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Failed password for user"
);
}
#[test]
fn test_syslog_parser_rfc3164() {
let parser = SyslogParser::new().unwrap();
let line =
"Oct 11 22:14:15 server01 sshd[1234]: Failed password for user from 192.168.1.100";
let result = EventParser::parse(&parser, line).unwrap();
assert_eq!(
result
.fields
.get("ts")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Oct 11 22:14:15"
);
assert_eq!(
result
.fields
.get("host")
.unwrap()
.clone()
.into_string()
.unwrap(),
"server01"
);
assert_eq!(
result
.fields
.get("prog")
.unwrap()
.clone()
.into_string()
.unwrap(),
"sshd"
);
assert_eq!(result.fields.get("pid").unwrap().as_int().unwrap(), 1234);
assert_eq!(
result
.fields
.get("msg")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Failed password for user from 192.168.1.100"
);
assert!(result.parsed_ts.is_some());
}
#[test]
fn test_syslog_parser_rfc3164_no_pid() {
let parser = SyslogParser::new().unwrap();
let line = "Oct 11 22:14:15 server01 kernel: CPU0: Core temperature above threshold";
let result = EventParser::parse(&parser, line).unwrap();
assert_eq!(
result
.fields
.get("ts")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Oct 11 22:14:15"
);
assert_eq!(
result
.fields
.get("host")
.unwrap()
.clone()
.into_string()
.unwrap(),
"server01"
);
assert_eq!(
result
.fields
.get("prog")
.unwrap()
.clone()
.into_string()
.unwrap(),
"kernel"
);
assert!(result.fields.get("pid").is_none()); assert_eq!(
result
.fields
.get("msg")
.unwrap()
.clone()
.into_string()
.unwrap(),
"CPU0: Core temperature above threshold"
);
}
#[test]
fn test_syslog_parser_rfc3164_with_priority() {
let parser = SyslogParser::new().unwrap();
let line = "<34>Oct 11 22:14:15 webserver nginx: 192.168.1.10 - - [11/Oct/2023:22:14:15 +0000] \"GET /index.html HTTP/1.1\" 200 612";
let result = EventParser::parse(&parser, line).unwrap();
assert_eq!(result.fields.get("pri").unwrap().as_int().unwrap(), 34);
assert_eq!(result.fields.get("facility").unwrap().as_int().unwrap(), 4); assert_eq!(result.fields.get("severity").unwrap().as_int().unwrap(), 2);
assert_eq!(
result
.fields
.get("ts")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Oct 11 22:14:15"
);
assert_eq!(
result
.fields
.get("host")
.unwrap()
.clone()
.into_string()
.unwrap(),
"webserver"
);
assert_eq!(
result
.fields
.get("prog")
.unwrap()
.clone()
.into_string()
.unwrap(),
"nginx"
);
assert!(result.fields.get("pid").is_none()); assert_eq!(
result
.fields
.get("msg")
.unwrap()
.clone()
.into_string()
.unwrap(),
"192.168.1.10 - - [11/Oct/2023:22:14:15 +0000] \"GET /index.html HTTP/1.1\" 200 612"
);
}
#[test]
fn test_syslog_parser_priority_calculation() {
let parser = SyslogParser::new().unwrap();
let test_cases = [
(0, 0, 0), (33, 4, 1), (165, 20, 5), (191, 23, 7), ];
for (priority, expected_facility, expected_severity) in test_cases {
let line = format!(
"<{}>1 2023-10-11T22:14:15.003Z server01 test - - - Test message",
priority
);
let result = EventParser::parse(&parser, &line).unwrap();
assert_eq!(
result.fields.get("pri").unwrap().as_int().unwrap(),
priority as i64
);
assert_eq!(
result.fields.get("facility").unwrap().as_int().unwrap(),
expected_facility
);
assert_eq!(
result.fields.get("severity").unwrap().as_int().unwrap(),
expected_severity
);
}
}
#[test]
fn test_syslog_parser_invalid_priority() {
let parser = SyslogParser::new().unwrap();
let line = "<999>1 2023-10-11T22:14:15.003Z server01 test - - - Test message";
assert!(EventParser::parse(&parser, line).is_err());
}
#[test]
fn test_syslog_parser_invalid_format() {
let parser = SyslogParser::new().unwrap();
let line = "This is not a syslog line";
assert!(EventParser::parse(&parser, line).is_err());
}
#[test]
fn test_syslog_parser_fallback_to_rfc3164() {
let parser = SyslogParser::new().unwrap();
let line = "Dec 25 14:09:07 server01 httpd: GET /index.html HTTP/1.1";
let result = EventParser::parse(&parser, line).unwrap();
assert_eq!(
result
.fields
.get("ts")
.unwrap()
.clone()
.into_string()
.unwrap(),
"Dec 25 14:09:07"
);
assert_eq!(
result
.fields
.get("host")
.unwrap()
.clone()
.into_string()
.unwrap(),
"server01"
);
assert_eq!(
result
.fields
.get("prog")
.unwrap()
.clone()
.into_string()
.unwrap(),
"httpd"
);
assert_eq!(
result
.fields
.get("msg")
.unwrap()
.clone()
.into_string()
.unwrap(),
"GET /index.html HTTP/1.1"
);
}
}