use chrono::{NaiveDateTime, Utc};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DateFormat {
Syslog,
Iso8601,
Epoch,
Common,
}
pub struct DateParser {
format: DateFormat,
regex: Option<Regex>,
}
impl DateParser {
pub fn new(format: DateFormat) -> Result<Self> {
let regex = if format == DateFormat::Iso8601 {
None
} else {
let pattern = match format {
DateFormat::Syslog => r"([A-Z][a-z]{2})\s+(\d{1,2})\s+(\d{2}):(\d{2}):(\d{2})",
DateFormat::Epoch => r"(\d{10,})",
DateFormat::Common => r"(\d{2})/([A-Z][a-z]{2})/(\d{4}):(\d{2}):(\d{2}):(\d{2})",
DateFormat::Iso8601 => unreachable!(),
};
Some(Regex::new(pattern).map_err(|e| Error::Regex {
pattern: pattern.to_string(),
source: e,
})?)
};
Ok(Self { format, regex })
}
pub fn parse_line(&self, line: &str) -> Option<i64> {
if self.format == DateFormat::Iso8601 {
scan_iso8601(line.as_bytes())
} else {
let caps = self.regex.as_ref()?.captures(line)?;
match self.format {
DateFormat::Syslog => parse_syslog(&caps),
DateFormat::Epoch => parse_epoch(&caps),
DateFormat::Common => parse_common(&caps),
DateFormat::Iso8601 => unreachable!(),
}
}
}
}
fn parse_syslog(caps: ®ex::Captures<'_>) -> Option<i64> {
let month_str = caps.get(1)?.as_str();
let day: u32 = caps.get(2)?.as_str().parse().ok()?;
let hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let min: u32 = caps.get(4)?.as_str().parse().ok()?;
let sec: u32 = caps.get(5)?.as_str().parse().ok()?;
let month = month_from_abbr(month_str)?;
let year = Utc::now().format("%Y").to_string().parse::<i32>().ok()?;
let dt = NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(year, month, day)?,
chrono::NaiveTime::from_hms_opt(hour, min, sec)?,
);
Some(dt.and_utc().timestamp())
}
fn parse_epoch(caps: ®ex::Captures<'_>) -> Option<i64> {
caps.get(1)?.as_str().parse::<i64>().ok()
}
fn parse_common(caps: ®ex::Captures<'_>) -> Option<i64> {
let day: u32 = caps.get(1)?.as_str().parse().ok()?;
let month_str = caps.get(2)?.as_str();
let year: i32 = caps.get(3)?.as_str().parse().ok()?;
let hour: u32 = caps.get(4)?.as_str().parse().ok()?;
let min: u32 = caps.get(5)?.as_str().parse().ok()?;
let sec: u32 = caps.get(6)?.as_str().parse().ok()?;
let month = month_from_abbr(month_str)?;
let dt = NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(year, month, day)?,
chrono::NaiveTime::from_hms_opt(hour, min, sec)?,
);
Some(dt.and_utc().timestamp())
}
fn scan_iso8601(b: &[u8]) -> Option<i64> {
if b.len() < 19 {
return None;
}
for i in 0..=b.len() - 19 {
let Some(window) = b.get(i..i + 19) else {
continue;
};
#[allow(clippy::indexing_slicing)]
let matches_structure = window[4] == b'-'
&& window[7] == b'-'
&& (window[10] == b'T' || window[10] == b' ')
&& window[13] == b':'
&& window[16] == b':';
if matches_structure {
let year = parse_4(b, i)?;
let month = parse_2(b, i + 5)?;
let day = parse_2(b, i + 8)?;
let hour = parse_2(b, i + 11)?;
let min = parse_2(b, i + 14)?;
let sec = parse_2(b, i + 17)?;
if (1..=12).contains(&month)
&& (1..=31).contains(&day)
&& hour <= 23
&& min <= 59
&& sec <= 59
{
return Some(unix_timestamp(year as i32, month, day, hour, min, sec));
}
}
}
None
}
#[inline]
fn parse_2(b: &[u8], pos: usize) -> Option<u32> {
let d1 = (*b.get(pos)?).wrapping_sub(b'0');
let d2 = (*b.get(pos + 1)?).wrapping_sub(b'0');
if d1 > 9 || d2 > 9 {
return None;
}
Some(u32::from(d1) * 10 + u32::from(d2))
}
#[inline]
fn parse_4(b: &[u8], pos: usize) -> Option<u32> {
let d1 = (*b.get(pos)?).wrapping_sub(b'0');
let d2 = (*b.get(pos + 1)?).wrapping_sub(b'0');
let d3 = (*b.get(pos + 2)?).wrapping_sub(b'0');
let d4 = (*b.get(pos + 3)?).wrapping_sub(b'0');
if d1 > 9 || d2 > 9 || d3 > 9 || d4 > 9 {
return None;
}
Some(u32::from(d1) * 1000 + u32::from(d2) * 100 + u32::from(d3) * 10 + u32::from(d4))
}
fn unix_timestamp(year: i32, month: u32, day: u32, hour: u32, min: u32, sec: u32) -> i64 {
let y = if month <= 2 {
i64::from(year) - 1
} else {
i64::from(year)
};
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = (y - era * 400) as u64;
let m = if month > 2 {
u64::from(month) - 3
} else {
u64::from(month) + 9
};
let doy = (153 * m + 2) / 5 + u64::from(day) - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
let days = era * 146_097 + doe as i64 - 719_468;
days * 86400 + i64::from(hour) * 3600 + i64::from(min) * 60 + i64::from(sec)
}
fn month_from_abbr(s: &str) -> Option<u32> {
match s {
"Jan" => Some(1),
"Feb" => Some(2),
"Mar" => Some(3),
"Apr" => Some(4),
"May" => Some(5),
"Jun" => Some(6),
"Jul" => Some(7),
"Aug" => Some(8),
"Sep" => Some(9),
"Oct" => Some(10),
"Nov" => Some(11),
"Dec" => Some(12),
_ => None,
}
}
#[cfg(test)]
mod tests {
use crate::detect::date::{DateFormat, DateParser};
#[test]
fn syslog_format() {
let parser = DateParser::new(DateFormat::Syslog).unwrap();
let line = "Jan 15 10:30:00 server sshd[1234]: Failed password";
let ts = parser.parse_line(line);
assert!(ts.is_some());
let ts = ts.unwrap();
assert!(ts > 0);
}
#[test]
fn syslog_single_digit_day() {
let parser = DateParser::new(DateFormat::Syslog).unwrap();
let line = "Feb 3 08:15:22 host kernel: something";
let ts = parser.parse_line(line);
assert!(ts.is_some());
}
#[test]
fn iso8601_format() {
let parser = DateParser::new(DateFormat::Iso8601).unwrap();
let line = "2024-01-15T10:30:00Z some log message";
let ts = parser.parse_line(line).unwrap();
assert!(ts > 0);
let ts2 = parser.parse_line(line).unwrap();
assert_eq!(ts, ts2);
}
#[test]
fn iso8601_space_separator() {
let parser = DateParser::new(DateFormat::Iso8601).unwrap();
let line = "2024-01-15 10:30:00 some log message";
let ts = parser.parse_line(line).unwrap();
let line_t = "2024-01-15T10:30:00 some log message";
let ts_t = parser.parse_line(line_t).unwrap();
assert_eq!(ts, ts_t);
}
#[test]
fn epoch_format() {
let parser = DateParser::new(DateFormat::Epoch).unwrap();
let line = "1705312200 something happened";
let ts = parser.parse_line(line).unwrap();
assert_eq!(ts, 1_705_312_200);
}
#[test]
fn common_log_format() {
let parser = DateParser::new(DateFormat::Common).unwrap();
let line = r#"192.168.1.1 - - [15/Jan/2024:10:30:00 +0000] "GET / HTTP/1.1""#;
let ts = parser.parse_line(line).unwrap();
assert!(ts > 0);
let iso_parser = DateParser::new(DateFormat::Iso8601).unwrap();
let iso_ts = iso_parser.parse_line("2024-01-15T10:30:00 log").unwrap();
assert_eq!(ts, iso_ts);
}
#[test]
fn no_match_returns_none() {
let parser = DateParser::new(DateFormat::Iso8601).unwrap();
let line = "this line has no date";
assert!(parser.parse_line(line).is_none());
}
#[test]
fn wrong_format_returns_none() {
let parser = DateParser::new(DateFormat::Epoch).unwrap();
let line = "Jan 15 10:30:00 syslog format line";
assert!(parser.parse_line(line).is_none());
}
#[test]
fn all_months_parse() {
let parser = DateParser::new(DateFormat::Syslog).unwrap();
let months = [
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
];
for month in months {
let line = format!("{month} 15 12:00:00 test");
assert!(
parser.parse_line(&line).is_some(),
"failed to parse month: {month}"
);
}
}
#[test]
fn invalid_month_returns_none() {
let parser = DateParser::new(DateFormat::Syslog).unwrap();
let line = "Xyz 15 10:30:00 server test";
assert!(parser.parse_line(line).is_none());
}
#[test]
fn epoch_nine_digits_no_match() {
let parser = DateParser::new(DateFormat::Epoch).unwrap();
let line = "999999999 short";
assert!(parser.parse_line(line).is_none());
}
#[test]
fn epoch_ten_digits_matches() {
let parser = DateParser::new(DateFormat::Epoch).unwrap();
let line = "1000000000 ten digits";
let ts = parser.parse_line(line).unwrap();
assert_eq!(ts, 1_000_000_000);
}
#[test]
fn common_invalid_month_returns_none() {
let parser = DateParser::new(DateFormat::Common).unwrap();
let line = r#"10.0.0.1 - - [15/Xyz/2024:10:30:00 +0000] "GET /""#;
assert!(parser.parse_line(line).is_none());
}
#[test]
fn syslog_invalid_time_returns_none() {
let parser = DateParser::new(DateFormat::Syslog).unwrap();
let line = "Jan 15 25:30:00 server test";
assert!(parser.parse_line(line).is_none());
}
}