use crate::types::{ColumnStats, DateTimeStats};
use chrono::{Datelike, NaiveDate, NaiveDateTime, Timelike, Weekday};
use std::collections::HashMap;
struct ParsedDateTime {
date: NaiveDate,
datetime: Option<NaiveDateTime>,
}
pub fn calculate_datetime_stats(data: &[String]) -> ColumnStats {
ColumnStats::DateTime(compute_datetime_stats(data))
}
pub fn compute_datetime_stats(data: &[String]) -> DateTimeStats {
let parsed: Vec<ParsedDateTime> = data.iter().filter_map(|s| parse_flexible_full(s)).collect();
if parsed.is_empty() {
return DateTimeStats::empty();
}
let dates: Vec<NaiveDate> = parsed.iter().map(|p| p.date).collect();
let min_date = dates.iter().min().unwrap();
let max_date = dates.iter().max().unwrap();
let duration_days = (*max_date - *min_date).num_days() as f64;
let year_distribution = build_year_distribution(&dates);
let month_distribution = build_month_distribution(&dates);
let day_of_week_distribution = build_day_of_week_distribution(&dates);
let datetimes: Vec<NaiveDateTime> = parsed.iter().filter_map(|p| p.datetime).collect();
let hour_distribution = if datetimes.is_empty() {
None
} else {
Some(build_hour_distribution(&datetimes))
};
DateTimeStats {
min_datetime: min_date.format("%Y-%m-%d").to_string(),
max_datetime: max_date.format("%Y-%m-%d").to_string(),
duration_days,
year_distribution,
month_distribution,
day_of_week_distribution,
hour_distribution,
}
}
fn parse_flexible_full(s: &str) -> Option<ParsedDateTime> {
let trimmed = s.trim();
if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S") {
return Some(ParsedDateTime {
date: dt.date(),
datetime: Some(dt),
});
}
if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S") {
return Some(ParsedDateTime {
date: dt.date(),
datetime: Some(dt),
});
}
if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%d/%m/%Y %H:%M:%S") {
return Some(ParsedDateTime {
date: dt.date(),
datetime: Some(dt),
});
}
if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S%.f") {
return Some(ParsedDateTime {
date: dt.date(),
datetime: Some(dt),
});
}
let date_formats = vec![
"%Y-%m-%d", "%d/%m/%Y", "%d-%m-%Y", "%d.%m.%Y", "%Y/%m/%d", "%m/%d/%Y", ];
for format in date_formats {
if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
return Some(ParsedDateTime {
date,
datetime: None,
});
}
}
None
}
fn build_year_distribution(dates: &[NaiveDate]) -> HashMap<i32, usize> {
let mut dist = HashMap::new();
for date in dates {
*dist.entry(date.year()).or_insert(0) += 1;
}
dist
}
fn build_month_distribution(dates: &[NaiveDate]) -> HashMap<u32, usize> {
let mut dist = HashMap::new();
for date in dates {
*dist.entry(date.month()).or_insert(0) += 1;
}
dist
}
fn build_day_of_week_distribution(dates: &[NaiveDate]) -> HashMap<String, usize> {
let mut dist = HashMap::new();
for date in dates {
let day_name = weekday_name(date.weekday());
*dist.entry(day_name.to_string()).or_insert(0) += 1;
}
dist
}
fn weekday_name(weekday: Weekday) -> &'static str {
match weekday {
Weekday::Mon => "Monday",
Weekday::Tue => "Tuesday",
Weekday::Wed => "Wednesday",
Weekday::Thu => "Thursday",
Weekday::Fri => "Friday",
Weekday::Sat => "Saturday",
Weekday::Sun => "Sunday",
}
}
fn build_hour_distribution(datetimes: &[NaiveDateTime]) -> HashMap<u32, usize> {
let mut dist = HashMap::new();
for dt in datetimes {
*dist.entry(dt.hour()).or_insert(0) += 1;
}
dist
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_iso_date() {
let parsed = parse_flexible_full("2023-01-15").unwrap();
assert_eq!(parsed.date.year(), 2023);
assert_eq!(parsed.date.month(), 1);
assert_eq!(parsed.date.day(), 15);
assert!(parsed.datetime.is_none());
}
#[test]
fn test_parse_european_format() {
let parsed = parse_flexible_full("15/01/2023").unwrap();
assert_eq!(parsed.date.day(), 15);
assert_eq!(parsed.date.month(), 1);
assert_eq!(parsed.date.year(), 2023);
assert!(parsed.datetime.is_none());
}
#[test]
fn test_parse_us_format() {
let parsed = parse_flexible_full("01/15/2023").unwrap();
assert_eq!(parsed.date.month(), 1);
assert_eq!(parsed.date.day(), 15);
assert!(parsed.datetime.is_none());
}
#[test]
fn test_parse_datetime_iso() {
let parsed = parse_flexible_full("2023-01-15T10:30:00").unwrap();
assert_eq!(parsed.date.year(), 2023);
assert!(parsed.datetime.is_some());
let dt = parsed.datetime.unwrap();
assert_eq!(dt.hour(), 10);
assert_eq!(dt.minute(), 30);
}
#[test]
fn test_year_distribution() {
let dates = vec![
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2023, 6, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
];
let dist = build_year_distribution(&dates);
assert_eq!(dist.get(&2023), Some(&2));
assert_eq!(dist.get(&2024), Some(&1));
}
#[test]
fn test_month_distribution() {
let dates = vec![
NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2023, 1, 15).unwrap(),
NaiveDate::from_ymd_opt(2023, 2, 1).unwrap(),
];
let dist = build_month_distribution(&dates);
assert_eq!(dist.get(&1), Some(&2));
assert_eq!(dist.get(&2), Some(&1));
}
#[test]
fn test_day_of_week_distribution() {
let dates = vec![
NaiveDate::from_ymd_opt(2023, 1, 2).unwrap(), NaiveDate::from_ymd_opt(2023, 1, 3).unwrap(), NaiveDate::from_ymd_opt(2023, 1, 9).unwrap(), ];
let dist = build_day_of_week_distribution(&dates);
assert_eq!(dist.get("Monday"), Some(&2));
assert_eq!(dist.get("Tuesday"), Some(&1));
}
#[test]
fn test_duration_calculation() {
let data = vec!["2023-01-01".to_string(), "2023-01-31".to_string()];
let stats = calculate_datetime_stats(&data);
match stats {
ColumnStats::DateTime(d) => {
assert_eq!(d.duration_days, 30.0);
}
_ => panic!("Expected DateTime stats"),
}
}
#[test]
fn test_hour_distribution() {
let data = vec![
"2023-01-01T10:00:00".to_string(),
"2023-01-01T10:30:00".to_string(),
"2023-01-01T14:00:00".to_string(),
];
let stats = calculate_datetime_stats(&data);
match stats {
ColumnStats::DateTime(d) => {
let dist = d.hour_distribution.unwrap();
assert_eq!(dist.get(&10), Some(&2));
assert_eq!(dist.get(&14), Some(&1));
}
_ => panic!("Expected DateTime stats"),
}
}
#[test]
fn test_empty_data() {
let data: Vec<String> = vec![];
let stats = calculate_datetime_stats(&data);
match stats {
ColumnStats::DateTime(d) => {
assert!(d.min_datetime.is_empty());
assert!(d.max_datetime.is_empty());
assert_eq!(d.duration_days, 0.0);
}
_ => panic!("Expected DateTime stats"),
}
}
}