rustalib/util/
time_utils.rs

1use chrono::{Datelike, NaiveDate, NaiveDateTime, Timelike};
2use polars::prelude::*;
3use std::f64::consts::PI;
4
5/// Parse a date string into a NaiveDate object
6///
7/// # Arguments
8///
9/// * `date_str` - Date string in YYYY-MM-DD format
10///
11/// # Returns
12///
13/// Returns a Result with NaiveDate on success, or error on failure
14pub fn parse_date(date_str: &str) -> Result<NaiveDate, chrono::ParseError> {
15    NaiveDate::parse_from_str(date_str, "%Y-%m-%d")
16}
17
18/// Format a NaiveDate into a string
19///
20/// # Arguments
21///
22/// * `date` - NaiveDate object to format
23///
24/// # Returns
25///
26/// Returns a formatted date string in YYYY-MM-DD format
27pub fn format_date(date: &NaiveDate) -> String {
28    date.format("%Y-%m-%d").to_string()
29}
30
31/// Create time-based cyclical features from a time column
32///
33/// # Arguments
34///
35/// * `df` - DataFrame containing a time column
36/// * `time_column` - Name of the time column (default: "time")
37/// * `time_format` - Format of the time strings (default: "%Y-%m-%d %H:%M:%S UTC")
38///
39/// # Returns
40///
41/// Returns a Result containing a vector of Series with cyclical time features
42pub fn create_cyclical_time_features(
43    df: &DataFrame,
44    time_column: &str,
45    time_format: &str,
46) -> PolarsResult<Vec<Series>> {
47    // Extract and validate time column
48    let time_series = df.column(time_column)?;
49    let time_strs = time_series.str()?;
50
51    // Initialize vectors for storing sine and cosine features
52    let mut hour_sin = Vec::with_capacity(df.height());
53    let mut hour_cos = Vec::with_capacity(df.height());
54    let mut day_sin = Vec::with_capacity(df.height());
55    let mut day_cos = Vec::with_capacity(df.height());
56
57    // Create Timezone-naïve chrono format
58    let format_str = time_format.replace(" UTC", "");
59
60    for i in 0..df.height() {
61        let time_str = time_strs.get(i).unwrap_or("");
62        let datetime = match NaiveDateTime::parse_from_str(time_str, &format_str) {
63            Ok(dt) => dt,
64            Err(_) => {
65                // Default values if parsing fails
66                hour_sin.push(0.0);
67                hour_cos.push(1.0);
68                day_sin.push(0.0);
69                day_cos.push(1.0);
70                continue;
71            }
72        };
73
74        // Extract hour (0-23) and day of week (0-6)
75        let hour = datetime.hour() as f64;
76        let day = datetime.weekday().num_days_from_monday() as f64;
77
78        // Encode using sine and cosine to capture cyclical patterns
79        hour_sin.push((2.0 * PI * hour / 24.0).sin());
80        hour_cos.push((2.0 * PI * hour / 24.0).cos());
81        day_sin.push((2.0 * PI * day / 7.0).sin());
82        day_cos.push((2.0 * PI * day / 7.0).cos());
83    }
84
85    // Create series
86    let result = vec![
87        Series::new("hour_sin".into(), hour_sin),
88        Series::new("hour_cos".into(), hour_cos),
89        Series::new("day_of_week_sin".into(), day_sin),
90        Series::new("day_of_week_cos".into(), day_cos),
91    ];
92
93    Ok(result)
94}