use chrono::{NaiveDateTime, Utc};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DateFormat {
Syslog,
Iso8601,
Epoch,
Common,
}
pub struct DateParser {
format: DateFormat,
regex: Option<Regex>,
}
impl DateParser {
pub fn new(format: DateFormat) -> Result<Self> {
let regex = match format {
DateFormat::Iso8601 => None,
_ => {
let pattern = match format {
DateFormat::Syslog => r"([A-Z][a-z]{2})\s+(\d{1,2})\s+(\d{2}):(\d{2}):(\d{2})",
DateFormat::Epoch => r"(\d{10,})",
DateFormat::Common => {
r"(\d{2})/([A-Z][a-z]{2})/(\d{4}):(\d{2}):(\d{2}):(\d{2})"
}
DateFormat::Iso8601 => unreachable!(),
};
Some(Regex::new(pattern).map_err(|e| Error::Regex {
pattern: pattern.to_string(),
source: e,
})?)
}
};
Ok(Self { format, regex })
}
pub fn parse_line(&self, line: &str) -> Option<i64> {
match self.format {
DateFormat::Iso8601 => scan_iso8601(line.as_bytes()),
_ => {
let caps = self.regex.as_ref()?.captures(line)?;
match self.format {
DateFormat::Syslog => parse_syslog(&caps),
DateFormat::Epoch => parse_epoch(&caps),
DateFormat::Common => parse_common(&caps),
DateFormat::Iso8601 => unreachable!(),
}
}
}
}
}
fn parse_syslog(caps: ®ex::Captures<'_>) -> Option<i64> {
let month_str = caps.get(1)?.as_str();
let day: u32 = caps.get(2)?.as_str().parse().ok()?;
let hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let min: u32 = caps.get(4)?.as_str().parse().ok()?;
let sec: u32 = caps.get(5)?.as_str().parse().ok()?;
let month = month_from_abbr(month_str)?;
let year = Utc::now().format("%Y").to_string().parse::<i32>().ok()?;
let dt = NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(year, month, day)?,
chrono::NaiveTime::from_hms_opt(hour, min, sec)?,
);
Some(dt.and_utc().timestamp())
}
fn parse_epoch(caps: ®ex::Captures<'_>) -> Option<i64> {
caps.get(1)?.as_str().parse::<i64>().ok()
}
fn parse_common(caps: ®ex::Captures<'_>) -> Option<i64> {
let day: u32 = caps.get(1)?.as_str().parse().ok()?;
let month_str = caps.get(2)?.as_str();
let year: i32 = caps.get(3)?.as_str().parse().ok()?;
let hour: u32 = caps.get(4)?.as_str().parse().ok()?;
let min: u32 = caps.get(5)?.as_str().parse().ok()?;
let sec: u32 = caps.get(6)?.as_str().parse().ok()?;
let month = month_from_abbr(month_str)?;
let dt = NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(year, month, day)?,
chrono::NaiveTime::from_hms_opt(hour, min, sec)?,
);
Some(dt.and_utc().timestamp())
}
fn scan_iso8601(b: &[u8]) -> Option<i64> {
if b.len() < 19 {
return None;
}
for i in 0..=b.len() - 19 {
if b[i + 4] == b'-'
&& b[i + 7] == b'-'
&& (b[i + 10] == b'T' || b[i + 10] == b' ')
&& b[i + 13] == b':'
&& b[i + 16] == b':'
{
let year = parse_4(b, i)?;
let month = parse_2(b, i + 5)?;
let day = parse_2(b, i + 8)?;
let hour = parse_2(b, i + 11)?;
let min = parse_2(b, i + 14)?;
let sec = parse_2(b, i + 17)?;
if (1..=12).contains(&month)
&& (1..=31).contains(&day)
&& hour <= 23
&& min <= 59
&& sec <= 59
{
return Some(unix_timestamp(year as i32, month, day, hour, min, sec));
}
}
}
None
}
#[inline(always)]
fn parse_2(b: &[u8], pos: usize) -> Option<u32> {
let d1 = b[pos].wrapping_sub(b'0');
let d2 = b[pos + 1].wrapping_sub(b'0');
if d1 > 9 || d2 > 9 {
return None;
}
Some(d1 as u32 * 10 + d2 as u32)
}
#[inline(always)]
fn parse_4(b: &[u8], pos: usize) -> Option<u32> {
let d1 = b[pos].wrapping_sub(b'0');
let d2 = b[pos + 1].wrapping_sub(b'0');
let d3 = b[pos + 2].wrapping_sub(b'0');
let d4 = b[pos + 3].wrapping_sub(b'0');
if d1 > 9 || d2 > 9 || d3 > 9 || d4 > 9 {
return None;
}
Some(d1 as u32 * 1000 + d2 as u32 * 100 + d3 as u32 * 10 + d4 as u32)
}
fn unix_timestamp(year: i32, month: u32, day: u32, hour: u32, min: u32, sec: u32) -> i64 {
let y = if month <= 2 {
year as i64 - 1
} else {
year as i64
};
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = (y - era * 400) as u64;
let m = if month > 2 {
month as u64 - 3
} else {
month as u64 + 9
};
let doy = (153 * m + 2) / 5 + day as u64 - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
let days = era * 146097 + doe as i64 - 719468;
days * 86400 + hour as i64 * 3600 + min as i64 * 60 + sec as i64
}
fn month_from_abbr(s: &str) -> Option<u32> {
match s {
"Jan" => Some(1),
"Feb" => Some(2),
"Mar" => Some(3),
"Apr" => Some(4),
"May" => Some(5),
"Jun" => Some(6),
"Jul" => Some(7),
"Aug" => Some(8),
"Sep" => Some(9),
"Oct" => Some(10),
"Nov" => Some(11),
"Dec" => Some(12),
_ => None,
}
}