use std::fmt::Formatter;
use crate::value::Value;
use chrono::{
DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc,
};
use chrono_tz::{Tz, UTC};
use peeking_take_while::PeekableExt;
use regex::Regex;
use tracing::warn;
use super::super::parse_grok::InternalError;
pub fn convert_time_format(format: &str) -> Result<String, String> {
let mut time_format = String::new();
let mut chars = format.chars().peekable();
while let Some(&c) = chars.peek() {
if c.is_ascii_uppercase() || c.is_ascii_lowercase() {
let token: String = chars.by_ref().peeking_take_while(|&cn| cn == c).collect();
match token.chars().next().unwrap() {
'h' => time_format.push_str("%I"),
'H' => time_format.push_str("%H"),
'm' => time_format.push_str("%M"),
's' => time_format.push_str("%S"),
'S' => {
time_format.pop(); time_format.push_str("%.f"); }
'y' | 'Y' if token.len() == 2 => time_format.push_str("%y"),
'y' | 'Y' => time_format.push_str("%Y"),
'x' => time_format.push_str("%D"),
'c' | 'C' => time_format.push_str("%C"),
'd' if token.len() == 1 => time_format.push_str("%-d"),
'd' => time_format.push_str("%d"),
'e' => time_format.push_str("%u"),
'D' => time_format.push_str("%j"),
'w' => time_format.push_str("%V"),
'M' => {
if token.len() == 1 {
time_format.push_str("%-m");
} else if token.len() == 2 {
time_format.push_str("%m");
} else if token.len() == 3 {
time_format.push_str("%b");
} else if token.len() > 3 {
time_format.push_str("%B");
}
}
'a' => time_format.push_str("%p"),
'E' if token.len() == 3 => time_format.push_str("%a"),
'E' if token.len() > 3 => time_format.push_str("%A"),
'z' => time_format.push_str("%Z"),
'Z' => {
if token.len() == 1 {
time_format.push_str("%z");
} else if token.len() == 2 {
time_format.push_str("%:z");
}
}
_ => return Err(format!("invalid date format '{format}'")),
}
} else if c == '\''
{
let literal: String = chars
.by_ref()
.skip(1)
.take_while(|&cn| cn != '\'')
.collect();
time_format.push_str(literal.as_str());
} else {
time_format.push(chars.next().unwrap());
}
}
Ok(time_format)
}
pub struct RegexResult {
pub regex: String,
pub with_tz: bool,
pub with_tz_capture: bool,
pub with_fraction_second: bool,
}
pub fn parse_timezone(tz: &str) -> Result<FixedOffset, String> {
let tz = match tz {
"GMT" | "UTC" | "UT" | "Z" => FixedOffset::east_opt(0).expect("invalid timestamp"),
_ if tz.starts_with('+') || tz.starts_with('-') => parse_offset(tz)?,
_ if tz.contains('+') => parse_offset(&tz[tz.find('+').unwrap()..])?,
_ if tz.contains('-') => parse_offset(&tz[tz.find('-').unwrap()..])?,
tz => parse_tz_id_or_name(tz)?,
};
Ok(tz)
}
fn parse_tz_id_or_name(tz: &str) -> Result<FixedOffset, String> {
let tz = tz.parse::<Tz>().map_err(|e| e.to_string())?;
Ok(Utc::now().with_timezone(&tz).offset().fix())
}
fn parse_offset(tz: &str) -> Result<FixedOffset, String> {
if tz.len() <= 3 {
let hours_diff = tz.parse::<i32>().map_err(|e| e.to_string())?;
return Ok(FixedOffset::east_opt(hours_diff * 3600).expect("invalid timestamp"));
}
let offset_format = if tz.contains(':') { "%:z" } else { "%z" };
let date_str = format!("2020-04-12 22:10:57 {tz}");
let datetime =
DateTime::parse_from_str(&date_str, &format!("%Y-%m-%d %H:%M:%S {offset_format}"))
.map_err(|e| e.to_string())?;
Ok(datetime.timezone())
}
const FRACTION_CHAR_GROUP: &str = "fr";
pub fn time_format_to_regex(format: &str, with_captures: bool) -> Result<RegexResult, String> {
let mut regex = String::new();
let mut chars = format.chars().peekable();
let mut with_tz = false;
let mut with_tz_capture = false;
let mut with_fraction_second = false;
while let Some(&c) = chars.peek() {
if c.is_ascii_uppercase() || c.is_ascii_lowercase() {
let token: String = chars.by_ref().peeking_take_while(|&cn| cn == c).collect();
match c {
'h' | 'H' | 'm' | 's' | 'Y' | 'x' | 'c' | 'C' | 'e' | 'D' | 'w' => {
regex.push_str(format!("[\\d]{{{}}}", token.len()).as_str())
}
'd' if token.len() == 1 => regex.push_str("[\\d]{1,2}"), 'd' => regex.push_str(format!("[\\d]{{{}}}", token.len()).as_str()),
'y' if token.len() == 1 => regex.push_str("[\\d]{4}"), 'y' => regex.push_str(format!("[\\d]{{{}}}", token.len()).as_str()),
'S' => {
if let Some(fraction_char) = regex.pop() {
let fraction_char = if fraction_char == '.' {
regex.pop(); "\\.".to_string() } else {
fraction_char.to_string()
};
if with_captures {
regex.push_str(
format!("(?P<{FRACTION_CHAR_GROUP}>{fraction_char})").as_str(),
);
with_fraction_second = true;
} else {
regex.push_str(&fraction_char);
}
}
regex.push_str(&format!("[\\d]{{{}}}", token.len()));
}
'M' if token.len() == 1 => regex.push_str("[\\d]{1,2}"), 'M' if token.len() == 2 => regex.push_str("[\\d]{2}"),
'M' if token.len() == 3 =>
{
regex.push_str("[\\w]{3}")
}
'M' if token.len() > 3 =>
{
regex.push_str("[\\w]+")
}
'a' => regex.push_str("(?:[aA][mM]|[pP][mM])"),
'E' if token.len() == 3 =>
{
regex.push_str("[\\w]{3}")
}
'E' if token.len() > 3 => regex.push_str("[\\w]+"),
'z' => {
if token.len() >= 4 {
if with_captures {
regex.push_str("(?P<tz>[\\w]+(?:/[\\w]+)?)");
with_tz_capture = true;
} else {
regex.push_str("[\\w]+(?:\\/[\\w]+)?");
}
} else if with_captures {
regex.push_str("(?P<tz>[\\w]+)");
with_tz_capture = true;
} else {
regex.push_str("[\\w]+");
}
with_tz = true;
}
'Z' => {
if token.len() == 1 || token.len() == 2 {
regex.push_str("(?:Z|[+-]\\d\\d:?\\d\\d)");
} else {
regex.push_str("[\\w]+(?:/[\\w]+)?");
}
with_tz = true;
}
_ => return Err(format!("invalid date format '{format}'")),
}
} else if c == '\'' {
{
let literal: String = chars
.by_ref()
.skip(1)
.take_while(|&cn| cn != '\'')
.collect();
regex.push_str(literal.as_str());
}
} else {
if c == '.' {
regex.push('\\'); }
regex.push(c);
chars.next();
}
}
Ok(RegexResult {
regex,
with_tz,
with_tz_capture,
with_fraction_second,
})
}
pub fn apply_date_filter(value: &Value, filter: &DateFilter) -> Result<Value, InternalError> {
let original_value = value
.as_str()
.ok_or_else(|| InternalError::FailedToApplyFilter(filter.to_string(), value.to_string()))?;
let (strp_format, mut datetime) =
adjust_strp_format_and_value(&filter.strp_format, &original_value);
if datetime.ends_with('Z') && filter.original_format.ends_with('Z') {
datetime.pop(); datetime.push_str("+0000");
};
if filter.with_tz_capture {
let tz = filter
.regex
.captures(&original_value)
.and_then(|caps| caps.name("tz"))
.expect("Filter should contain tz capture")
.as_str();
let tz: Tz = tz.parse().map_err(|error| {
warn!(message = "Error parsing tz", %tz, %error);
InternalError::FailedToApplyFilter(filter.to_string(), original_value.to_string())
})?;
replace_sec_fraction_with_dot(filter, &mut datetime);
let naive_date = NaiveDateTime::parse_from_str(&datetime, &strp_format).map_err(|error|
{
warn!(message = "Error parsing date", value = %original_value, format = %strp_format, % error);
InternalError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
let dt = tz
.from_local_datetime(&naive_date)
.single()
.ok_or_else(|| {
InternalError::FailedToApplyFilter(filter.to_string(), original_value.to_string())
})?;
Ok(Value::from(
Utc.from_utc_datetime(&dt.naive_utc()).timestamp_millis(),
))
} else {
replace_sec_fraction_with_dot(filter, &mut datetime);
if filter.tz_aware {
let timestamp = DateTime::parse_from_str(&datetime, &strp_format).map_err(|error| {
warn!(message = "Error parsing date", date = %original_value, % error);
InternalError::FailedToApplyFilter(filter.to_string(), original_value.to_string())
})?;
Ok(Value::from(timestamp.to_utc().timestamp_millis()))
} else if let Ok(dt) = NaiveDateTime::parse_from_str(&datetime, &strp_format) {
if let Some(tz) = &filter.target_tz {
let tzs = parse_timezone(tz).map_err(|error| {
warn!(message = "Error parsing tz", tz = %tz, % error);
InternalError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
let dt = tzs.from_local_datetime(&dt).single().ok_or_else(|| {
warn!(message = "Error parsing date", date = %original_value);
InternalError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
Ok(Value::from(dt.to_utc().timestamp_millis()))
} else {
Ok(Value::from(dt.and_utc().timestamp_millis()))
}
} else if let Ok(nt) = NaiveTime::parse_from_str(&datetime, &strp_format) {
Ok(Value::from(
NaiveDateTime::new(
NaiveDate::from_ymd_opt(1970, 1, 1).expect("invalid date"),
nt,
)
.and_utc()
.timestamp_millis(),
))
} else {
let nd = NaiveDate::parse_from_str(&datetime, &strp_format).map_err(|error| {
warn!(message = "Error parsing date", date = %original_value, % error);
InternalError::FailedToApplyFilter(filter.to_string(), original_value.to_string())
})?;
let datetime_tz = UTC
.from_local_datetime(&NaiveDateTime::new(
nd,
NaiveTime::from_hms_opt(0, 0, 0).expect("invalid timestamp"),
))
.single()
.ok_or_else(|| {
warn!(message = "Error parsing date", date = %original_value);
InternalError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
Ok(Value::from(
Utc.from_utc_datetime(&datetime_tz.naive_utc())
.timestamp_millis(),
))
}
}
}
pub fn adjust_strp_format_and_value(strp_format: &str, original_value: &str) -> (String, String) {
let mut adjusted_format = String::from(strp_format);
let mut adjusted_value = String::from(original_value);
let now = Utc::now();
if !strp_format.contains('d') {
adjusted_format = format!("%-m %-d {adjusted_format}");
adjusted_value = format!("{} {} {}", now.month(), now.day(), adjusted_value);
}
if !strp_format.contains('y') && !strp_format.contains('Y') {
adjusted_format = format!("%Y {adjusted_format}");
adjusted_value = format!("{} {}", now.year(), adjusted_value);
}
(adjusted_format, adjusted_value)
}
fn replace_sec_fraction_with_dot(filter: &DateFilter, value: &mut String) {
if filter.with_fraction_second
&& let Some(caps) = filter.regex.captures(value)
&& let Some(m) = caps.name(FRACTION_CHAR_GROUP)
{
value.replace_range(m.start()..m.end(), ".");
}
}
#[derive(Debug, Clone)]
pub struct DateFilter {
pub original_format: String,
pub strp_format: String,
pub tz_aware: bool,
pub regex: Regex,
pub target_tz: Option<String>,
pub with_tz_capture: bool,
pub with_fraction_second: bool,
}
impl std::fmt::Display for DateFilter {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "date(\"{}\")", self.original_format)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn adjusts_datetime_format_and_value_when_day_missing() {
let (adj_format, adj_value) = adjust_strp_format_and_value("%H:%M:%S", "12:03:42");
let now = Utc::now();
let expected_datetime = NaiveDate::from_ymd_opt(now.year(), now.month(), now.day())
.unwrap()
.and_hms_opt(12, 3, 42)
.unwrap();
assert_eq!(
expected_datetime,
NaiveDateTime::parse_from_str(&adj_value, &adj_format).unwrap()
)
}
#[test]
fn adjusts_datetime_format_and_value_when_year_missing() {
let (adj_format, adj_value) =
adjust_strp_format_and_value("%-d/%-m %H:%M:%S", "25/03 12:03:42");
let now = Utc::now();
let expected_datetime = NaiveDate::from_ymd_opt(now.year(), 3, 25)
.unwrap()
.and_hms_opt(12, 3, 42)
.unwrap();
assert_eq!(
expected_datetime,
NaiveDateTime::parse_from_str(&adj_value, &adj_format).unwrap()
)
}
}