ta_lib_in_rust/util/
time_utils.rs1use chrono::{Datelike, NaiveDateTime, Timelike};
2use polars::prelude::*;
3use std::f64::consts::PI;
4
5pub fn create_cyclical_time_features(
17 df: &DataFrame,
18 time_column: &str,
19 time_format: &str,
20) -> PolarsResult<Vec<Series>> {
21 if !df.schema().contains(time_column) {
23 return Err(PolarsError::ComputeError(
24 format!("Time column '{}' not found", time_column).into(),
25 ));
26 }
27
28 let time_col = df.column(time_column)?.str()?;
29 let n_rows = df.height();
30
31 let mut hour_sin = Vec::with_capacity(n_rows);
33 let mut hour_cos = Vec::with_capacity(n_rows);
34 let mut day_sin = Vec::with_capacity(n_rows);
35 let mut day_cos = Vec::with_capacity(n_rows);
36
37 for i in 0..n_rows {
38 let time_str = time_col.get(i).unwrap_or("");
39 let datetime = match NaiveDateTime::parse_from_str(time_str, time_format) {
40 Ok(dt) => dt,
41 Err(_) => {
42 hour_sin.push(0.0);
44 hour_cos.push(1.0);
45 day_sin.push(0.0);
46 day_cos.push(1.0);
47 continue;
48 }
49 };
50
51 let hour = datetime.hour() as f64;
53 let day = datetime.weekday().num_days_from_monday() as f64;
54
55 hour_sin.push((2.0 * PI * hour / 24.0).sin());
57 hour_cos.push((2.0 * PI * hour / 24.0).cos());
58 day_sin.push((2.0 * PI * day / 7.0).sin());
59 day_cos.push((2.0 * PI * day / 7.0).cos());
60 }
61
62 let result = vec![
64 Series::new("hour_sin".into(), hour_sin),
65 Series::new("hour_cos".into(), hour_cos),
66 Series::new("day_of_week_sin".into(), day_sin),
67 Series::new("day_of_week_cos".into(), day_cos),
68 ];
69
70 Ok(result)
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76 use approx::assert_relative_eq;
77 use std::f64::consts::PI;
78
79 #[test]
80 fn test_create_cyclical_time_features() {
81 let time_data = vec![
83 "2023-01-01 00:00:00 UTC", "2023-01-01 06:00:00 UTC", "2023-01-01 12:00:00 UTC", "2023-01-01 18:00:00 UTC", "2023-01-02 12:00:00 UTC", "2023-01-03 12:00:00 UTC", "2023-01-04 12:00:00 UTC", ];
91
92 let time_series = Series::new("timestamp".into(), time_data);
93 let df = DataFrame::new(vec![time_series.into()]).unwrap();
94
95 let features =
97 create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
98
99 assert_eq!(features.len(), 4);
101
102 assert_eq!(features[0].name(), "hour_sin");
104 assert_eq!(features[1].name(), "hour_cos");
105 assert_eq!(features[2].name(), "day_of_week_sin");
106 assert_eq!(features[3].name(), "day_of_week_cos");
107
108 assert_relative_eq!(
112 features[0].f64().unwrap().get(0).unwrap(),
113 0.0,
114 epsilon = 1e-10
115 );
116 assert_relative_eq!(
117 features[1].f64().unwrap().get(0).unwrap(),
118 1.0,
119 epsilon = 1e-10
120 );
121
122 assert_relative_eq!(
124 features[0].f64().unwrap().get(1).unwrap(),
125 (PI / 2.0).sin(),
126 epsilon = 1e-10
127 );
128 assert_relative_eq!(
129 features[1].f64().unwrap().get(1).unwrap(),
130 (PI / 2.0).cos(),
131 epsilon = 1e-10
132 );
133
134 assert_relative_eq!(
136 features[0].f64().unwrap().get(2).unwrap(),
137 (PI).sin(),
138 epsilon = 1e-10
139 );
140 assert_relative_eq!(
141 features[1].f64().unwrap().get(2).unwrap(),
142 (PI).cos(),
143 epsilon = 1e-10
144 );
145
146 assert_relative_eq!(
148 features[0].f64().unwrap().get(3).unwrap(),
149 (3.0 * PI / 2.0).sin(),
150 epsilon = 1e-10
151 );
152 assert_relative_eq!(
153 features[1].f64().unwrap().get(3).unwrap(),
154 (3.0 * PI / 2.0).cos(),
155 epsilon = 1e-10
156 );
157
158 assert_relative_eq!(
160 features[2].f64().unwrap().get(0).unwrap(),
161 (2.0 * PI * 6.0 / 7.0).sin(),
162 epsilon = 1e-10
163 );
164 assert_relative_eq!(
165 features[3].f64().unwrap().get(0).unwrap(),
166 (2.0 * PI * 6.0 / 7.0).cos(),
167 epsilon = 1e-10
168 );
169
170 for i in 4..7 {
172 let day = i - 4;
173 let expected_sin = (2.0 * PI * day as f64 / 7.0).sin();
174 let expected_cos = (2.0 * PI * day as f64 / 7.0).cos();
175 assert_relative_eq!(
176 features[2].f64().unwrap().get(i).unwrap(),
177 expected_sin,
178 epsilon = 1e-10
179 );
180 assert_relative_eq!(
181 features[3].f64().unwrap().get(i).unwrap(),
182 expected_cos,
183 epsilon = 1e-10
184 );
185 }
186 }
187
188 #[test]
189 fn test_create_cyclical_time_features_invalid_format() {
190 let time_data = vec![
192 "2023-01-01 00:00:00 UTC", "2023/01/01 06:00:00", ];
195
196 let time_series = Series::new("timestamp".into(), time_data);
197 let df = DataFrame::new(vec![time_series.into()]).unwrap();
198
199 let features =
201 create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
202
203 assert_relative_eq!(
205 features[0].f64().unwrap().get(0).unwrap(),
206 0.0,
207 epsilon = 1e-10
208 );
209 assert_relative_eq!(
210 features[1].f64().unwrap().get(0).unwrap(),
211 1.0,
212 epsilon = 1e-10
213 );
214
215 assert_relative_eq!(
217 features[0].f64().unwrap().get(1).unwrap(),
218 0.0,
219 epsilon = 1e-10
220 );
221 assert_relative_eq!(
222 features[1].f64().unwrap().get(1).unwrap(),
223 1.0,
224 epsilon = 1e-10
225 );
226 }
227
228 #[test]
229 #[should_panic(expected = "not found")]
230 fn test_create_cyclical_time_features_missing_column() {
231 let dummy_series = Series::new("dummy".into(), &[1, 2, 3]);
233 let df = DataFrame::new(vec![dummy_series.into()]).unwrap();
234
235 let _ = create_cyclical_time_features(&df, "timestamp", "%Y-%m-%d %H:%M:%S UTC").unwrap();
237 }
238}