Skip to main content

datasynth_eval/ml/
temporal_fidelity.rs

1//! Temporal fidelity evaluation.
2//!
3//! Validates temporal patterns including autocorrelation at weekly and monthly
4//! lags, period-end spikes, and weekday coefficient of variation.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// A single temporal record with a timestamp and associated value.
11#[derive(Debug, Clone)]
12pub struct TemporalRecord {
13    /// Unix epoch timestamp in seconds.
14    pub timestamp_epoch: i64,
15    /// Observed value at this timestamp.
16    pub value: f64,
17}
18
19/// Thresholds for temporal fidelity analysis.
20#[derive(Debug, Clone)]
21pub struct TemporalFidelityThresholds {
22    /// Minimum temporal fidelity score.
23    pub min_temporal_fidelity: f64,
24}
25
26impl Default for TemporalFidelityThresholds {
27    fn default() -> Self {
28        Self {
29            min_temporal_fidelity: 0.70,
30        }
31    }
32}
33
34/// Results of temporal fidelity analysis.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct TemporalFidelityAnalysis {
37    /// Overall temporal fidelity score (0.0-1.0).
38    pub temporal_fidelity_score: f64,
39    /// Maximum of weekly and monthly autocorrelation.
40    pub seasonality_strength: f64,
41    /// Autocorrelation at lag 7 (weekly pattern).
42    pub weekly_autocorrelation: f64,
43    /// Autocorrelation at lag 30 (monthly pattern).
44    pub monthly_autocorrelation: f64,
45    /// Ratio of mean(last 5 days of month) to mean(rest of month).
46    pub period_end_spike_ratio: f64,
47    /// Coefficient of variation of counts across weekday bins.
48    pub weekday_cv: f64,
49    /// Total number of records analyzed.
50    pub total_records: usize,
51    /// Whether the analysis passes all thresholds.
52    pub passes: bool,
53    /// Issues found during analysis.
54    pub issues: Vec<String>,
55}
56
57/// Analyzer for temporal fidelity.
58pub struct TemporalFidelityAnalyzer {
59    thresholds: TemporalFidelityThresholds,
60}
61
62impl TemporalFidelityAnalyzer {
63    /// Create a new analyzer with default thresholds.
64    pub fn new() -> Self {
65        Self {
66            thresholds: TemporalFidelityThresholds::default(),
67        }
68    }
69
70    /// Create an analyzer with custom thresholds.
71    pub fn with_thresholds(thresholds: TemporalFidelityThresholds) -> Self {
72        Self { thresholds }
73    }
74
75    /// Analyze temporal fidelity.
76    pub fn analyze(&self, records: &[TemporalRecord]) -> EvalResult<TemporalFidelityAnalysis> {
77        let mut issues = Vec::new();
78        let total_records = records.len();
79
80        if records.is_empty() {
81            return Ok(TemporalFidelityAnalysis {
82                temporal_fidelity_score: 0.0,
83                seasonality_strength: 0.0,
84                weekly_autocorrelation: 0.0,
85                monthly_autocorrelation: 0.0,
86                period_end_spike_ratio: 1.0,
87                weekday_cv: 0.0,
88                total_records: 0,
89                passes: true,
90                issues: vec!["No records provided".to_string()],
91            });
92        }
93
94        // Sort by timestamp
95        let mut sorted: Vec<&TemporalRecord> = records.iter().collect();
96        sorted.sort_by_key(|r| r.timestamp_epoch);
97
98        // Build daily aggregated series
99        let daily_values = self.aggregate_daily(&sorted);
100
101        // Compute autocorrelations
102        let weekly_autocorrelation = self.autocorrelation(&daily_values, 7);
103        let monthly_autocorrelation = self.autocorrelation(&daily_values, 30);
104        let seasonality_strength = weekly_autocorrelation
105            .abs()
106            .max(monthly_autocorrelation.abs());
107
108        // Compute period-end spike ratio
109        let period_end_spike_ratio = self.compute_period_end_spike(&sorted);
110
111        // Compute weekday CV
112        let weekday_cv = self.compute_weekday_cv(&sorted);
113
114        // Composite score
115        // Reward: strong seasonality, clear period-end spikes, moderate weekday variation
116        let seasonality_factor = seasonality_strength.clamp(0.0, 1.0);
117        let spike_factor = if period_end_spike_ratio > 1.0 {
118            (1.0 - 1.0 / period_end_spike_ratio).clamp(0.0, 1.0)
119        } else {
120            0.0
121        };
122        let weekday_factor = weekday_cv.clamp(0.0, 1.0);
123
124        let temporal_fidelity_score =
125            (seasonality_factor * 0.4 + spike_factor * 0.3 + weekday_factor * 0.3).clamp(0.0, 1.0);
126
127        if temporal_fidelity_score < self.thresholds.min_temporal_fidelity {
128            issues.push(format!(
129                "Temporal fidelity score {:.4} < {:.4} (threshold)",
130                temporal_fidelity_score, self.thresholds.min_temporal_fidelity
131            ));
132        }
133
134        let passes = issues.is_empty();
135
136        Ok(TemporalFidelityAnalysis {
137            temporal_fidelity_score,
138            seasonality_strength,
139            weekly_autocorrelation,
140            monthly_autocorrelation,
141            period_end_spike_ratio,
142            weekday_cv,
143            total_records,
144            passes,
145            issues,
146        })
147    }
148
149    /// Aggregate records into daily value sums.
150    fn aggregate_daily(&self, sorted_records: &[&TemporalRecord]) -> Vec<f64> {
151        if sorted_records.is_empty() {
152            return Vec::new();
153        }
154
155        let seconds_per_day = 86400i64;
156        let mut daily: HashMap<i64, f64> = HashMap::new();
157
158        for record in sorted_records {
159            let day = record.timestamp_epoch / seconds_per_day;
160            *daily.entry(day).or_insert(0.0) += record.value;
161        }
162
163        // Convert to ordered series
164        let mut days: Vec<i64> = daily.keys().copied().collect();
165        days.sort_unstable();
166
167        if days.is_empty() {
168            return Vec::new();
169        }
170
171        let first_day = days[0];
172        let last_day = *days.last().unwrap_or(&first_day);
173        let range = (last_day - first_day + 1) as usize;
174
175        let mut series = vec![0.0; range];
176        for (&day, &val) in &daily {
177            let idx = (day - first_day) as usize;
178            if idx < series.len() {
179                series[idx] = val;
180            }
181        }
182
183        series
184    }
185
186    /// Compute autocorrelation at the given lag.
187    fn autocorrelation(&self, series: &[f64], lag: usize) -> f64 {
188        if series.len() <= lag {
189            return 0.0;
190        }
191
192        let n = series.len();
193        let mean = series.iter().sum::<f64>() / n as f64;
194        let variance: f64 = series.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
195
196        if variance < 1e-12 {
197            return 0.0;
198        }
199
200        let mut cov = 0.0;
201        for i in 0..(n - lag) {
202            cov += (series[i] - mean) * (series[i + lag] - mean);
203        }
204        cov /= n as f64;
205
206        cov / variance
207    }
208
209    /// Compute period-end spike: ratio of mean(last 5 days of month) to mean(rest).
210    fn compute_period_end_spike(&self, sorted_records: &[&TemporalRecord]) -> f64 {
211        let mut end_values = Vec::new();
212        let mut rest_values = Vec::new();
213
214        for record in sorted_records {
215            let day_of_month = self.day_of_month(record.timestamp_epoch);
216            let days_in_month = self.days_in_month(record.timestamp_epoch);
217
218            if day_of_month > days_in_month.saturating_sub(5) {
219                end_values.push(record.value);
220            } else {
221                rest_values.push(record.value);
222            }
223        }
224
225        let mean_end = if end_values.is_empty() {
226            0.0
227        } else {
228            end_values.iter().sum::<f64>() / end_values.len() as f64
229        };
230
231        let mean_rest = if rest_values.is_empty() {
232            0.0
233        } else {
234            rest_values.iter().sum::<f64>() / rest_values.len() as f64
235        };
236
237        if mean_rest.abs() < 1e-12 {
238            return 1.0;
239        }
240
241        mean_end / mean_rest
242    }
243
244    /// Compute coefficient of variation of record counts across weekday bins.
245    fn compute_weekday_cv(&self, sorted_records: &[&TemporalRecord]) -> f64 {
246        let mut weekday_counts = [0usize; 7];
247
248        for record in sorted_records {
249            let weekday = self.weekday(record.timestamp_epoch);
250            weekday_counts[weekday] += 1;
251        }
252
253        let counts: Vec<f64> = weekday_counts.iter().map(|&c| c as f64).collect();
254        let mean = counts.iter().sum::<f64>() / 7.0;
255
256        if mean < 1e-12 {
257            return 0.0;
258        }
259
260        let variance = counts.iter().map(|c| (c - mean).powi(2)).sum::<f64>() / 7.0;
261        variance.sqrt() / mean
262    }
263
264    /// Get day-of-month (1-based) from epoch seconds.
265    fn day_of_month(&self, epoch: i64) -> u32 {
266        // Simple calculation: approximate using 86400 seconds per day
267        // Using a simplified Gregorian calendar conversion
268        let days_since_epoch = epoch / 86400;
269        let (_, _, day) = days_to_ymd(days_since_epoch);
270        day
271    }
272
273    /// Get approximate days in the month for the given epoch.
274    fn days_in_month(&self, epoch: i64) -> u32 {
275        let days_since_epoch = epoch / 86400;
276        let (year, month, _) = days_to_ymd(days_since_epoch);
277        match month {
278            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
279            4 | 6 | 9 | 11 => 30,
280            2 => {
281                if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
282                    29
283                } else {
284                    28
285                }
286            }
287            _ => 30,
288        }
289    }
290
291    /// Get weekday (0=Monday, 6=Sunday) from epoch seconds.
292    fn weekday(&self, epoch: i64) -> usize {
293        // January 1, 1970 was a Thursday (index 3 for Mon=0)
294        let days = epoch / 86400;
295        ((days % 7 + 3) % 7) as usize
296    }
297}
298
299/// Convert days since Unix epoch to (year, month, day).
300fn days_to_ymd(mut days: i64) -> (i64, u32, u32) {
301    // Shift to March-based year to simplify leap-day handling
302    days += 719468; // days from 0000-03-01 to 1970-01-01
303    let era = if days >= 0 {
304        days / 146097
305    } else {
306        (days - 146096) / 146097
307    };
308    let doe = (days - era * 146097) as u32; // day of era [0, 146096]
309    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // year of era
310    let y = yoe as i64 + era * 400;
311    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // day of year
312    let mp = (5 * doy + 2) / 153;
313    let d = doy - (153 * mp + 2) / 5 + 1;
314    let m = if mp < 10 { mp + 3 } else { mp - 9 };
315    let year = if m <= 2 { y + 1 } else { y };
316    (year, m, d)
317}
318
319impl Default for TemporalFidelityAnalyzer {
320    fn default() -> Self {
321        Self::new()
322    }
323}
324
325#[cfg(test)]
326#[allow(clippy::unwrap_used)]
327mod tests {
328    use super::*;
329
330    fn make_daily_records(values: &[f64], start_epoch: i64) -> Vec<TemporalRecord> {
331        values
332            .iter()
333            .enumerate()
334            .map(|(i, &v)| TemporalRecord {
335                timestamp_epoch: start_epoch + (i as i64) * 86400,
336                value: v,
337            })
338            .collect()
339    }
340
341    #[test]
342    fn test_valid_temporal_patterns() {
343        // Create a weekly pattern: higher on weekdays, lower on weekends
344        let mut values = Vec::new();
345        for week in 0..12 {
346            for day in 0..7 {
347                let base = 100.0;
348                let val = if day < 5 {
349                    base + (week as f64) * 2.0
350                } else {
351                    base * 0.3
352                };
353                values.push(val);
354            }
355        }
356
357        // Start on 2024-01-01 (Monday) epoch = 1704067200
358        let records = make_daily_records(&values, 1_704_067_200);
359
360        let analyzer = TemporalFidelityAnalyzer::new();
361        let result = analyzer.analyze(&records).unwrap();
362
363        assert_eq!(result.total_records, 84);
364        assert!(result.weekly_autocorrelation > 0.0);
365    }
366
367    #[test]
368    fn test_invalid_temporal_flat() {
369        // Completely flat series: no temporal patterns
370        let values = vec![100.0; 90];
371        let records = make_daily_records(&values, 1_704_067_200);
372
373        let analyzer = TemporalFidelityAnalyzer::new();
374        let result = analyzer.analyze(&records).unwrap();
375
376        // Flat series should have low fidelity
377        assert!(result.temporal_fidelity_score < 0.7);
378        assert!(!result.passes);
379    }
380
381    #[test]
382    fn test_empty_records() {
383        let analyzer = TemporalFidelityAnalyzer::new();
384        let result = analyzer.analyze(&[]).unwrap();
385
386        assert_eq!(result.total_records, 0);
387        assert_eq!(result.temporal_fidelity_score, 0.0);
388    }
389}