datasynth_eval/ml/
temporal_fidelity.rs1use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct TemporalRecord {
13 pub timestamp_epoch: i64,
15 pub value: f64,
17}
18
19#[derive(Debug, Clone)]
21pub struct TemporalFidelityThresholds {
22 pub min_temporal_fidelity: f64,
24}
25
26impl Default for TemporalFidelityThresholds {
27 fn default() -> Self {
28 Self {
29 min_temporal_fidelity: 0.70,
30 }
31 }
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct TemporalFidelityAnalysis {
37 pub temporal_fidelity_score: f64,
39 pub seasonality_strength: f64,
41 pub weekly_autocorrelation: f64,
43 pub monthly_autocorrelation: f64,
45 pub period_end_spike_ratio: f64,
47 pub weekday_cv: f64,
49 pub total_records: usize,
51 pub passes: bool,
53 pub issues: Vec<String>,
55}
56
57pub struct TemporalFidelityAnalyzer {
59 thresholds: TemporalFidelityThresholds,
60}
61
62impl TemporalFidelityAnalyzer {
63 pub fn new() -> Self {
65 Self {
66 thresholds: TemporalFidelityThresholds::default(),
67 }
68 }
69
70 pub fn with_thresholds(thresholds: TemporalFidelityThresholds) -> Self {
72 Self { thresholds }
73 }
74
75 pub fn analyze(&self, records: &[TemporalRecord]) -> EvalResult<TemporalFidelityAnalysis> {
77 let mut issues = Vec::new();
78 let total_records = records.len();
79
80 if records.is_empty() {
81 return Ok(TemporalFidelityAnalysis {
82 temporal_fidelity_score: 0.0,
83 seasonality_strength: 0.0,
84 weekly_autocorrelation: 0.0,
85 monthly_autocorrelation: 0.0,
86 period_end_spike_ratio: 1.0,
87 weekday_cv: 0.0,
88 total_records: 0,
89 passes: true,
90 issues: vec!["No records provided".to_string()],
91 });
92 }
93
94 let mut sorted: Vec<&TemporalRecord> = records.iter().collect();
96 sorted.sort_by_key(|r| r.timestamp_epoch);
97
98 let daily_values = self.aggregate_daily(&sorted);
100
101 let weekly_autocorrelation = self.autocorrelation(&daily_values, 7);
103 let monthly_autocorrelation = self.autocorrelation(&daily_values, 30);
104 let seasonality_strength = weekly_autocorrelation
105 .abs()
106 .max(monthly_autocorrelation.abs());
107
108 let period_end_spike_ratio = self.compute_period_end_spike(&sorted);
110
111 let weekday_cv = self.compute_weekday_cv(&sorted);
113
114 let seasonality_factor = seasonality_strength.clamp(0.0, 1.0);
117 let spike_factor = if period_end_spike_ratio > 1.0 {
118 (1.0 - 1.0 / period_end_spike_ratio).clamp(0.0, 1.0)
119 } else {
120 0.0
121 };
122 let weekday_factor = weekday_cv.clamp(0.0, 1.0);
123
124 let temporal_fidelity_score =
125 (seasonality_factor * 0.4 + spike_factor * 0.3 + weekday_factor * 0.3).clamp(0.0, 1.0);
126
127 if temporal_fidelity_score < self.thresholds.min_temporal_fidelity {
128 issues.push(format!(
129 "Temporal fidelity score {:.4} < {:.4} (threshold)",
130 temporal_fidelity_score, self.thresholds.min_temporal_fidelity
131 ));
132 }
133
134 let passes = issues.is_empty();
135
136 Ok(TemporalFidelityAnalysis {
137 temporal_fidelity_score,
138 seasonality_strength,
139 weekly_autocorrelation,
140 monthly_autocorrelation,
141 period_end_spike_ratio,
142 weekday_cv,
143 total_records,
144 passes,
145 issues,
146 })
147 }
148
149 fn aggregate_daily(&self, sorted_records: &[&TemporalRecord]) -> Vec<f64> {
151 if sorted_records.is_empty() {
152 return Vec::new();
153 }
154
155 let seconds_per_day = 86400i64;
156 let mut daily: HashMap<i64, f64> = HashMap::new();
157
158 for record in sorted_records {
159 let day = record.timestamp_epoch / seconds_per_day;
160 *daily.entry(day).or_insert(0.0) += record.value;
161 }
162
163 let mut days: Vec<i64> = daily.keys().copied().collect();
165 days.sort_unstable();
166
167 if days.is_empty() {
168 return Vec::new();
169 }
170
171 let first_day = days[0];
172 let last_day = *days.last().unwrap_or(&first_day);
173 let range = (last_day - first_day + 1) as usize;
174
175 let mut series = vec![0.0; range];
176 for (&day, &val) in &daily {
177 let idx = (day - first_day) as usize;
178 if idx < series.len() {
179 series[idx] = val;
180 }
181 }
182
183 series
184 }
185
186 fn autocorrelation(&self, series: &[f64], lag: usize) -> f64 {
188 if series.len() <= lag {
189 return 0.0;
190 }
191
192 let n = series.len();
193 let mean = series.iter().sum::<f64>() / n as f64;
194 let variance: f64 = series.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
195
196 if variance < 1e-12 {
197 return 0.0;
198 }
199
200 let mut cov = 0.0;
201 for i in 0..(n - lag) {
202 cov += (series[i] - mean) * (series[i + lag] - mean);
203 }
204 cov /= n as f64;
205
206 cov / variance
207 }
208
209 fn compute_period_end_spike(&self, sorted_records: &[&TemporalRecord]) -> f64 {
211 let mut end_values = Vec::new();
212 let mut rest_values = Vec::new();
213
214 for record in sorted_records {
215 let day_of_month = self.day_of_month(record.timestamp_epoch);
216 let days_in_month = self.days_in_month(record.timestamp_epoch);
217
218 if day_of_month > days_in_month.saturating_sub(5) {
219 end_values.push(record.value);
220 } else {
221 rest_values.push(record.value);
222 }
223 }
224
225 let mean_end = if end_values.is_empty() {
226 0.0
227 } else {
228 end_values.iter().sum::<f64>() / end_values.len() as f64
229 };
230
231 let mean_rest = if rest_values.is_empty() {
232 0.0
233 } else {
234 rest_values.iter().sum::<f64>() / rest_values.len() as f64
235 };
236
237 if mean_rest.abs() < 1e-12 {
238 return 1.0;
239 }
240
241 mean_end / mean_rest
242 }
243
244 fn compute_weekday_cv(&self, sorted_records: &[&TemporalRecord]) -> f64 {
246 let mut weekday_counts = [0usize; 7];
247
248 for record in sorted_records {
249 let weekday = self.weekday(record.timestamp_epoch);
250 weekday_counts[weekday] += 1;
251 }
252
253 let counts: Vec<f64> = weekday_counts.iter().map(|&c| c as f64).collect();
254 let mean = counts.iter().sum::<f64>() / 7.0;
255
256 if mean < 1e-12 {
257 return 0.0;
258 }
259
260 let variance = counts.iter().map(|c| (c - mean).powi(2)).sum::<f64>() / 7.0;
261 variance.sqrt() / mean
262 }
263
264 fn day_of_month(&self, epoch: i64) -> u32 {
266 let days_since_epoch = epoch / 86400;
269 let (_, _, day) = days_to_ymd(days_since_epoch);
270 day
271 }
272
273 fn days_in_month(&self, epoch: i64) -> u32 {
275 let days_since_epoch = epoch / 86400;
276 let (year, month, _) = days_to_ymd(days_since_epoch);
277 match month {
278 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
279 4 | 6 | 9 | 11 => 30,
280 2 => {
281 if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
282 29
283 } else {
284 28
285 }
286 }
287 _ => 30,
288 }
289 }
290
291 fn weekday(&self, epoch: i64) -> usize {
293 let days = epoch / 86400;
295 ((days % 7 + 3) % 7) as usize
296 }
297}
298
299fn days_to_ymd(mut days: i64) -> (i64, u32, u32) {
301 days += 719468; let era = if days >= 0 {
304 days / 146097
305 } else {
306 (days - 146096) / 146097
307 };
308 let doe = (days - era * 146097) as u32; let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; let y = yoe as i64 + era * 400;
311 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); let mp = (5 * doy + 2) / 153;
313 let d = doy - (153 * mp + 2) / 5 + 1;
314 let m = if mp < 10 { mp + 3 } else { mp - 9 };
315 let year = if m <= 2 { y + 1 } else { y };
316 (year, m, d)
317}
318
319impl Default for TemporalFidelityAnalyzer {
320 fn default() -> Self {
321 Self::new()
322 }
323}
324
325#[cfg(test)]
326#[allow(clippy::unwrap_used)]
327mod tests {
328 use super::*;
329
330 fn make_daily_records(values: &[f64], start_epoch: i64) -> Vec<TemporalRecord> {
331 values
332 .iter()
333 .enumerate()
334 .map(|(i, &v)| TemporalRecord {
335 timestamp_epoch: start_epoch + (i as i64) * 86400,
336 value: v,
337 })
338 .collect()
339 }
340
341 #[test]
342 fn test_valid_temporal_patterns() {
343 let mut values = Vec::new();
345 for week in 0..12 {
346 for day in 0..7 {
347 let base = 100.0;
348 let val = if day < 5 {
349 base + (week as f64) * 2.0
350 } else {
351 base * 0.3
352 };
353 values.push(val);
354 }
355 }
356
357 let records = make_daily_records(&values, 1_704_067_200);
359
360 let analyzer = TemporalFidelityAnalyzer::new();
361 let result = analyzer.analyze(&records).unwrap();
362
363 assert_eq!(result.total_records, 84);
364 assert!(result.weekly_autocorrelation > 0.0);
365 }
366
367 #[test]
368 fn test_invalid_temporal_flat() {
369 let values = vec![100.0; 90];
371 let records = make_daily_records(&values, 1_704_067_200);
372
373 let analyzer = TemporalFidelityAnalyzer::new();
374 let result = analyzer.analyze(&records).unwrap();
375
376 assert!(result.temporal_fidelity_score < 0.7);
378 assert!(!result.passes);
379 }
380
381 #[test]
382 fn test_empty_records() {
383 let analyzer = TemporalFidelityAnalyzer::new();
384 let result = analyzer.analyze(&[]).unwrap();
385
386 assert_eq!(result.total_records, 0);
387 assert_eq!(result.temporal_fidelity_score, 0.0);
388 }
389}