ta_lib_in_rust/util/
time_utils.rs

1use polars::prelude::*;
2use chrono::{NaiveDateTime, Timelike, Datelike};
3use std::f64::consts::PI;
4
5/// Create time-based cyclical features from a time column
6///
7/// # Arguments
8///
9/// * `df` - DataFrame containing a time column
10/// * `time_column` - Name of the time column (default: "time")
11/// * `time_format` - Format of the time strings (default: "%Y-%m-%d %H:%M:%S UTC")
12///
13/// # Returns
14///
15/// Returns a Result containing a vector of Series with cyclical time features
16pub fn create_cyclical_time_features(
17    df: &DataFrame, 
18    time_column: &str,
19    time_format: &str
20) -> PolarsResult<Vec<Series>> {
21    // Check if the time column exists
22    if !df.schema().contains(time_column) {
23        return Err(PolarsError::ComputeError(
24            format!("Time column '{}' not found", time_column).into()
25        ));
26    }
27    
28    let time_col = df.column(time_column)?.str()?;
29    let n_rows = df.height();
30    
31    // Create vectors for hour and day of week features
32    let mut hour_sin = Vec::with_capacity(n_rows);
33    let mut hour_cos = Vec::with_capacity(n_rows);
34    let mut day_sin = Vec::with_capacity(n_rows);
35    let mut day_cos = Vec::with_capacity(n_rows);
36    
37    for i in 0..n_rows {
38        let time_str = time_col.get(i).unwrap_or("");
39        let datetime = match NaiveDateTime::parse_from_str(time_str, time_format) {
40            Ok(dt) => dt,
41            Err(_) => {
42                // Default values if parsing fails
43                hour_sin.push(0.0);
44                hour_cos.push(1.0);
45                day_sin.push(0.0);
46                day_cos.push(1.0);
47                continue;
48            }
49        };
50        
51        // Extract hour (0-23) and day of week (0-6)
52        let hour = datetime.hour() as f64;
53        let day = datetime.weekday().num_days_from_monday() as f64;
54        
55        // Encode using sine and cosine to capture cyclical patterns
56        hour_sin.push((2.0 * PI * hour / 24.0).sin());
57        hour_cos.push((2.0 * PI * hour / 24.0).cos());
58        day_sin.push((2.0 * PI * day / 7.0).sin());
59        day_cos.push((2.0 * PI * day / 7.0).cos());
60    }
61    
62    // Create series
63    let result = vec![
64        Series::new("hour_sin".into(), hour_sin),
65        Series::new("hour_cos".into(), hour_cos),
66        Series::new("day_of_week_sin".into(), day_sin),
67        Series::new("day_of_week_cos".into(), day_cos),
68    ];
69    
70    Ok(result)
71} 
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76    use std::f64::consts::PI;
77    use approx::assert_relative_eq;
78    
79    #[test]
80    fn test_create_cyclical_time_features() {
81        // Create test DataFrame with time column
82        let time_data = vec![
83            "2023-01-01 00:00:00 UTC", // Sunday midnight
84            "2023-01-01 06:00:00 UTC", // Sunday 6 AM
85            "2023-01-01 12:00:00 UTC", // Sunday noon
86            "2023-01-01 18:00:00 UTC", // Sunday 6 PM
87            "2023-01-02 12:00:00 UTC", // Monday noon
88            "2023-01-03 12:00:00 UTC", // Tuesday noon
89            "2023-01-04 12:00:00 UTC", // Wednesday noon
90        ];
91        
92        let time_series = Series::new("timestamp".into(), time_data);
93        let df = DataFrame::new(vec![time_series.into()]).unwrap();
94        
95        // Get cyclical features
96        let features = create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
97        
98        // We should have 4 feature series
99        assert_eq!(features.len(), 4);
100        
101        // Check naming
102        assert_eq!(features[0].name(), "hour_sin");
103        assert_eq!(features[1].name(), "hour_cos");
104        assert_eq!(features[2].name(), "day_of_week_sin");
105        assert_eq!(features[3].name(), "day_of_week_cos");
106        
107        // Verify values
108        
109        // Midnight should have hour_sin = 0, hour_cos = 1
110        assert_relative_eq!(features[0].f64().unwrap().get(0).unwrap(), 0.0, epsilon = 1e-10);
111        assert_relative_eq!(features[1].f64().unwrap().get(0).unwrap(), 1.0, epsilon = 1e-10);
112        
113        // 6 AM should have hour_sin = 0.5, hour_cos = 0.866... (30 degrees)
114        assert_relative_eq!(features[0].f64().unwrap().get(1).unwrap(), (PI/2.0).sin(), epsilon = 1e-10);
115        assert_relative_eq!(features[1].f64().unwrap().get(1).unwrap(), (PI/2.0).cos(), epsilon = 1e-10);
116        
117        // Noon should have hour_sin = 0, hour_cos = -1 (180 degrees)
118        assert_relative_eq!(features[0].f64().unwrap().get(2).unwrap(), (PI).sin(), epsilon = 1e-10);
119        assert_relative_eq!(features[1].f64().unwrap().get(2).unwrap(), (PI).cos(), epsilon = 1e-10);
120        
121        // 6 PM should have hour_sin = -0.5, hour_cos = 0.866... (270 degrees)
122        assert_relative_eq!(features[0].f64().unwrap().get(3).unwrap(), (3.0*PI/2.0).sin(), epsilon = 1e-10);
123        assert_relative_eq!(features[1].f64().unwrap().get(3).unwrap(), (3.0*PI/2.0).cos(), epsilon = 1e-10);
124        
125        // Sunday should have day_of_week_sin = 0, day_of_week_cos = 1
126        assert_relative_eq!(features[2].f64().unwrap().get(0).unwrap(), (2.0*PI*6.0/7.0).sin(), epsilon = 1e-10);
127        assert_relative_eq!(features[3].f64().unwrap().get(0).unwrap(), (2.0*PI*6.0/7.0).cos(), epsilon = 1e-10);
128        
129        // Monday through Wednesday should have incremental values
130        for i in 4..7 {
131            let day = i - 4;
132            let expected_sin = (2.0 * PI * day as f64 / 7.0).sin();
133            let expected_cos = (2.0 * PI * day as f64 / 7.0).cos();
134            assert_relative_eq!(features[2].f64().unwrap().get(i).unwrap(), expected_sin, epsilon = 1e-10);
135            assert_relative_eq!(features[3].f64().unwrap().get(i).unwrap(), expected_cos, epsilon = 1e-10);
136        }
137    }
138    
139    #[test]
140    fn test_create_cyclical_time_features_invalid_format() {
141        // Create test DataFrame with improperly formatted time
142        let time_data = vec![
143            "2023-01-01 00:00:00 UTC", // Correct format
144            "2023/01/01 06:00:00",     // Incorrect format
145        ];
146        
147        let time_series = Series::new("timestamp".into(), time_data);
148        let df = DataFrame::new(vec![time_series.into()]).unwrap();
149        
150        // Get cyclical features
151        let features = create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
152        
153        // First row should have valid values
154        assert_relative_eq!(features[0].f64().unwrap().get(0).unwrap(), 0.0, epsilon = 1e-10);
155        assert_relative_eq!(features[1].f64().unwrap().get(0).unwrap(), 1.0, epsilon = 1e-10);
156        
157        // Second row should have default values due to parsing error
158        assert_relative_eq!(features[0].f64().unwrap().get(1).unwrap(), 0.0, epsilon = 1e-10);
159        assert_relative_eq!(features[1].f64().unwrap().get(1).unwrap(), 1.0, epsilon = 1e-10);
160    }
161    
162    #[test]
163    #[should_panic(expected = "not found")]
164    fn test_create_cyclical_time_features_missing_column() {
165        // Create test DataFrame with no time column
166        let dummy_series = Series::new("dummy".into(), &[1, 2, 3]);
167        let df = DataFrame::new(vec![dummy_series.into()]).unwrap();
168        
169        // This should panic as we're requesting a non-existent column
170        let _ = create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
171    }
172}