ta_lib_in_rust/util/
time_utils.rs

1use chrono::{Datelike, NaiveDateTime, Timelike};
2use polars::prelude::*;
3use std::f64::consts::PI;
4
5/// Create time-based cyclical features from a time column
6///
7/// # Arguments
8///
9/// * `df` - DataFrame containing a time column
10/// * `time_column` - Name of the time column (default: "time")
11/// * `time_format` - Format of the time strings (default: "%Y-%m-%d %H:%M:%S UTC")
12///
13/// # Returns
14///
15/// Returns a Result containing a vector of Series with cyclical time features
16pub fn create_cyclical_time_features(
17    df: &DataFrame,
18    time_column: &str,
19    time_format: &str,
20) -> PolarsResult<Vec<Series>> {
21    // Check if the time column exists
22    if !df.schema().contains(time_column) {
23        return Err(PolarsError::ComputeError(
24            format!("Time column '{}' not found", time_column).into(),
25        ));
26    }
27
28    let time_col = df.column(time_column)?.str()?;
29    let n_rows = df.height();
30
31    // Create vectors for hour and day of week features
32    let mut hour_sin = Vec::with_capacity(n_rows);
33    let mut hour_cos = Vec::with_capacity(n_rows);
34    let mut day_sin = Vec::with_capacity(n_rows);
35    let mut day_cos = Vec::with_capacity(n_rows);
36
37    for i in 0..n_rows {
38        let time_str = time_col.get(i).unwrap_or("");
39        let datetime = match NaiveDateTime::parse_from_str(time_str, time_format) {
40            Ok(dt) => dt,
41            Err(_) => {
42                // Default values if parsing fails
43                hour_sin.push(0.0);
44                hour_cos.push(1.0);
45                day_sin.push(0.0);
46                day_cos.push(1.0);
47                continue;
48            }
49        };
50
51        // Extract hour (0-23) and day of week (0-6)
52        let hour = datetime.hour() as f64;
53        let day = datetime.weekday().num_days_from_monday() as f64;
54
55        // Encode using sine and cosine to capture cyclical patterns
56        hour_sin.push((2.0 * PI * hour / 24.0).sin());
57        hour_cos.push((2.0 * PI * hour / 24.0).cos());
58        day_sin.push((2.0 * PI * day / 7.0).sin());
59        day_cos.push((2.0 * PI * day / 7.0).cos());
60    }
61
62    // Create series
63    let result = vec![
64        Series::new("hour_sin".into(), hour_sin),
65        Series::new("hour_cos".into(), hour_cos),
66        Series::new("day_of_week_sin".into(), day_sin),
67        Series::new("day_of_week_cos".into(), day_cos),
68    ];
69
70    Ok(result)
71}
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76    use approx::assert_relative_eq;
77    use std::f64::consts::PI;
78
79    #[test]
80    fn test_create_cyclical_time_features() {
81        // Create test DataFrame with time column
82        let time_data = vec![
83            "2023-01-01 00:00:00 UTC", // Sunday midnight
84            "2023-01-01 06:00:00 UTC", // Sunday 6 AM
85            "2023-01-01 12:00:00 UTC", // Sunday noon
86            "2023-01-01 18:00:00 UTC", // Sunday 6 PM
87            "2023-01-02 12:00:00 UTC", // Monday noon
88            "2023-01-03 12:00:00 UTC", // Tuesday noon
89            "2023-01-04 12:00:00 UTC", // Wednesday noon
90        ];
91
92        let time_series = Series::new("timestamp".into(), time_data);
93        let df = DataFrame::new(vec![time_series.into()]).unwrap();
94
95        // Get cyclical features
96        let features =
97            create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
98
99        // We should have 4 feature series
100        assert_eq!(features.len(), 4);
101
102        // Check naming
103        assert_eq!(features[0].name(), "hour_sin");
104        assert_eq!(features[1].name(), "hour_cos");
105        assert_eq!(features[2].name(), "day_of_week_sin");
106        assert_eq!(features[3].name(), "day_of_week_cos");
107
108        // Verify values
109
110        // Midnight should have hour_sin = 0, hour_cos = 1
111        assert_relative_eq!(
112            features[0].f64().unwrap().get(0).unwrap(),
113            0.0,
114            epsilon = 1e-10
115        );
116        assert_relative_eq!(
117            features[1].f64().unwrap().get(0).unwrap(),
118            1.0,
119            epsilon = 1e-10
120        );
121
122        // 6 AM should have hour_sin = 0.5, hour_cos = 0.866... (30 degrees)
123        assert_relative_eq!(
124            features[0].f64().unwrap().get(1).unwrap(),
125            (PI / 2.0).sin(),
126            epsilon = 1e-10
127        );
128        assert_relative_eq!(
129            features[1].f64().unwrap().get(1).unwrap(),
130            (PI / 2.0).cos(),
131            epsilon = 1e-10
132        );
133
134        // Noon should have hour_sin = 0, hour_cos = -1 (180 degrees)
135        assert_relative_eq!(
136            features[0].f64().unwrap().get(2).unwrap(),
137            (PI).sin(),
138            epsilon = 1e-10
139        );
140        assert_relative_eq!(
141            features[1].f64().unwrap().get(2).unwrap(),
142            (PI).cos(),
143            epsilon = 1e-10
144        );
145
146        // 6 PM should have hour_sin = -0.5, hour_cos = 0.866... (270 degrees)
147        assert_relative_eq!(
148            features[0].f64().unwrap().get(3).unwrap(),
149            (3.0 * PI / 2.0).sin(),
150            epsilon = 1e-10
151        );
152        assert_relative_eq!(
153            features[1].f64().unwrap().get(3).unwrap(),
154            (3.0 * PI / 2.0).cos(),
155            epsilon = 1e-10
156        );
157
158        // Sunday should have day_of_week_sin = 0, day_of_week_cos = 1
159        assert_relative_eq!(
160            features[2].f64().unwrap().get(0).unwrap(),
161            (2.0 * PI * 6.0 / 7.0).sin(),
162            epsilon = 1e-10
163        );
164        assert_relative_eq!(
165            features[3].f64().unwrap().get(0).unwrap(),
166            (2.0 * PI * 6.0 / 7.0).cos(),
167            epsilon = 1e-10
168        );
169
170        // Monday through Wednesday should have incremental values
171        for i in 4..7 {
172            let day = i - 4;
173            let expected_sin = (2.0 * PI * day as f64 / 7.0).sin();
174            let expected_cos = (2.0 * PI * day as f64 / 7.0).cos();
175            assert_relative_eq!(
176                features[2].f64().unwrap().get(i).unwrap(),
177                expected_sin,
178                epsilon = 1e-10
179            );
180            assert_relative_eq!(
181                features[3].f64().unwrap().get(i).unwrap(),
182                expected_cos,
183                epsilon = 1e-10
184            );
185        }
186    }
187
188    #[test]
189    fn test_create_cyclical_time_features_invalid_format() {
190        // Create test DataFrame with improperly formatted time
191        let time_data = vec![
192            "2023-01-01 00:00:00 UTC", // Correct format
193            "2023/01/01 06:00:00",     // Incorrect format
194        ];
195
196        let time_series = Series::new("timestamp".into(), time_data);
197        let df = DataFrame::new(vec![time_series.into()]).unwrap();
198
199        // Get cyclical features
200        let features =
201            create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
202
203        // First row should have valid values
204        assert_relative_eq!(
205            features[0].f64().unwrap().get(0).unwrap(),
206            0.0,
207            epsilon = 1e-10
208        );
209        assert_relative_eq!(
210            features[1].f64().unwrap().get(0).unwrap(),
211            1.0,
212            epsilon = 1e-10
213        );
214
215        // Second row should have default values due to parsing error
216        assert_relative_eq!(
217            features[0].f64().unwrap().get(1).unwrap(),
218            0.0,
219            epsilon = 1e-10
220        );
221        assert_relative_eq!(
222            features[1].f64().unwrap().get(1).unwrap(),
223            1.0,
224            epsilon = 1e-10
225        );
226    }
227
228    #[test]
229    #[should_panic(expected = "not found")]
230    fn test_create_cyclical_time_features_missing_column() {
231        // Create test DataFrame with no time column
232        let dummy_series = Series::new("dummy".into(), &[1, 2, 3]);
233        let df = DataFrame::new(vec![dummy_series.into()]).unwrap();
234
235        // This should panic as we're requesting a non-existent column
236        let _ = create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
237    }
238}