Skip to main content

datasynth_eval/statistical/
temporal.rs

1//! Temporal pattern analysis.
2//!
3//! Analyzes the temporal distribution of transactions including
4//! seasonality patterns, day-of-week effects, and periodic spikes.
5
6use crate::error::{EvalError, EvalResult};
7use chrono::{Datelike, NaiveDate, Weekday};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// Expected seasonality spike multipliers.
12pub const MONTH_END_SPIKE: f64 = 2.5;
13pub const QUARTER_END_SPIKE: f64 = 4.0;
14pub const YEAR_END_SPIKE: f64 = 6.0;
15pub const WEEKEND_RATIO: f64 = 0.10;
16
17/// Results of temporal pattern analysis.
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct TemporalAnalysis {
20    /// Number of entries analyzed.
21    pub sample_size: usize,
22    /// Start date of data.
23    pub start_date: NaiveDate,
24    /// End date of data.
25    pub end_date: NaiveDate,
26    /// Number of days spanned.
27    pub days_spanned: i64,
28    /// Correlation with expected temporal pattern.
29    pub pattern_correlation: f64,
30    /// Actual month-end spike ratio (vs average).
31    pub month_end_spike: f64,
32    /// Actual quarter-end spike ratio.
33    pub quarter_end_spike: f64,
34    /// Actual year-end spike ratio.
35    pub year_end_spike: f64,
36    /// Weekend activity ratio.
37    pub weekend_ratio: f64,
38    /// Day-of-week distribution.
39    pub day_of_week_distribution: HashMap<String, f64>,
40    /// Day-of-week correlation with expected pattern.
41    pub day_of_week_correlation: f64,
42    /// Monthly volume distribution.
43    pub monthly_distribution: HashMap<u32, usize>,
44    /// Whether patterns match expectations.
45    pub passes: bool,
46}
47
48/// Input for temporal analysis.
49#[derive(Debug, Clone)]
50pub struct TemporalEntry {
51    /// Posting date of the entry.
52    pub posting_date: NaiveDate,
53}
54
55/// Expected day-of-week weights.
56const DAY_WEIGHTS: [f64; 7] = [
57    1.3,  // Monday
58    1.1,  // Tuesday
59    1.0,  // Wednesday
60    1.0,  // Thursday
61    0.85, // Friday
62    0.05, // Saturday
63    0.05, // Sunday
64];
65
66/// Analyzer for temporal patterns.
67pub struct TemporalAnalyzer {
68    /// Whether to analyze industry seasonality.
69    analyze_industry_seasonality: bool,
70}
71
72impl TemporalAnalyzer {
73    /// Create a new analyzer.
74    pub fn new() -> Self {
75        Self {
76            analyze_industry_seasonality: false,
77        }
78    }
79
80    /// Enable industry seasonality analysis.
81    pub fn with_industry_seasonality(mut self) -> Self {
82        self.analyze_industry_seasonality = true;
83        self
84    }
85
86    /// Analyze temporal patterns from entries.
87    pub fn analyze(&self, entries: &[TemporalEntry]) -> EvalResult<TemporalAnalysis> {
88        let n = entries.len();
89        if n < 10 {
90            return Err(EvalError::InsufficientData {
91                required: 10,
92                actual: n,
93            });
94        }
95
96        // Get date range
97        let dates: Vec<NaiveDate> = entries.iter().map(|e| e.posting_date).collect();
98        let start_date = *dates.iter().min().unwrap();
99        let end_date = *dates.iter().max().unwrap();
100        let days_spanned = (end_date - start_date).num_days() + 1;
101
102        // Count by date
103        let mut daily_counts: HashMap<NaiveDate, usize> = HashMap::new();
104        for entry in entries {
105            *daily_counts.entry(entry.posting_date).or_insert(0) += 1;
106        }
107
108        // Calculate average daily volume
109        let avg_daily = n as f64 / days_spanned as f64;
110
111        // Month-end spike analysis
112        let month_end_spike = self.calculate_month_end_spike(&daily_counts, avg_daily);
113
114        // Quarter-end spike analysis
115        let quarter_end_spike = self.calculate_quarter_end_spike(&daily_counts, avg_daily);
116
117        // Year-end spike analysis
118        let year_end_spike = self.calculate_year_end_spike(&daily_counts, avg_daily);
119
120        // Weekend ratio
121        let weekend_count = entries
122            .iter()
123            .filter(|e| {
124                let weekday = e.posting_date.weekday();
125                weekday == Weekday::Sat || weekday == Weekday::Sun
126            })
127            .count();
128        let weekend_ratio = weekend_count as f64 / n as f64;
129
130        // Day-of-week distribution
131        let mut dow_counts = [0usize; 7];
132        for entry in entries {
133            let idx = entry.posting_date.weekday().num_days_from_monday() as usize;
134            dow_counts[idx] += 1;
135        }
136        let total_dow: usize = dow_counts.iter().sum();
137        let mut day_of_week_distribution = HashMap::new();
138        let weekdays = [
139            "Monday",
140            "Tuesday",
141            "Wednesday",
142            "Thursday",
143            "Friday",
144            "Saturday",
145            "Sunday",
146        ];
147        for (i, name) in weekdays.iter().enumerate() {
148            day_of_week_distribution
149                .insert(name.to_string(), dow_counts[i] as f64 / total_dow as f64);
150        }
151
152        // Day-of-week correlation
153        let day_of_week_correlation = self.calculate_dow_correlation(&dow_counts);
154
155        // Monthly distribution
156        let mut monthly_distribution: HashMap<u32, usize> = HashMap::new();
157        for entry in entries {
158            *monthly_distribution
159                .entry(entry.posting_date.month())
160                .or_insert(0) += 1;
161        }
162
163        // Overall pattern correlation
164        let pattern_correlation =
165            self.calculate_pattern_correlation(&daily_counts, start_date, end_date, avg_daily);
166
167        // Pass/fail check
168        let passes = pattern_correlation >= 0.5 && (weekend_ratio - WEEKEND_RATIO).abs() < 0.15;
169
170        Ok(TemporalAnalysis {
171            sample_size: n,
172            start_date,
173            end_date,
174            days_spanned,
175            pattern_correlation,
176            month_end_spike,
177            quarter_end_spike,
178            year_end_spike,
179            weekend_ratio,
180            day_of_week_distribution,
181            day_of_week_correlation,
182            monthly_distribution,
183            passes,
184        })
185    }
186
187    /// Calculate month-end spike ratio.
188    fn calculate_month_end_spike(
189        &self,
190        daily_counts: &HashMap<NaiveDate, usize>,
191        avg_daily: f64,
192    ) -> f64 {
193        if avg_daily <= 0.0 {
194            return 1.0;
195        }
196
197        let month_end_dates: Vec<&NaiveDate> = daily_counts
198            .keys()
199            .filter(|d| self.is_month_end(**d))
200            .collect();
201
202        if month_end_dates.is_empty() {
203            return 1.0;
204        }
205
206        let month_end_total: usize = month_end_dates
207            .iter()
208            .filter_map(|d| daily_counts.get(*d))
209            .sum();
210        let month_end_avg = month_end_total as f64 / month_end_dates.len() as f64;
211
212        month_end_avg / avg_daily
213    }
214
215    /// Calculate quarter-end spike ratio.
216    fn calculate_quarter_end_spike(
217        &self,
218        daily_counts: &HashMap<NaiveDate, usize>,
219        avg_daily: f64,
220    ) -> f64 {
221        if avg_daily <= 0.0 {
222            return 1.0;
223        }
224
225        let quarter_end_dates: Vec<&NaiveDate> = daily_counts
226            .keys()
227            .filter(|d| self.is_quarter_end(**d))
228            .collect();
229
230        if quarter_end_dates.is_empty() {
231            return 1.0;
232        }
233
234        let quarter_end_total: usize = quarter_end_dates
235            .iter()
236            .filter_map(|d| daily_counts.get(*d))
237            .sum();
238        let quarter_end_avg = quarter_end_total as f64 / quarter_end_dates.len() as f64;
239
240        quarter_end_avg / avg_daily
241    }
242
243    /// Calculate year-end spike ratio.
244    fn calculate_year_end_spike(
245        &self,
246        daily_counts: &HashMap<NaiveDate, usize>,
247        avg_daily: f64,
248    ) -> f64 {
249        if avg_daily <= 0.0 {
250            return 1.0;
251        }
252
253        let year_end_dates: Vec<&NaiveDate> = daily_counts
254            .keys()
255            .filter(|d| self.is_year_end(**d))
256            .collect();
257
258        if year_end_dates.is_empty() {
259            return 1.0;
260        }
261
262        let year_end_total: usize = year_end_dates
263            .iter()
264            .filter_map(|d| daily_counts.get(*d))
265            .sum();
266        let year_end_avg = year_end_total as f64 / year_end_dates.len() as f64;
267
268        year_end_avg / avg_daily
269    }
270
271    /// Check if date is in month-end period (last 5 days).
272    fn is_month_end(&self, date: NaiveDate) -> bool {
273        let next_month = if date.month() == 12 {
274            NaiveDate::from_ymd_opt(date.year() + 1, 1, 1)
275        } else {
276            NaiveDate::from_ymd_opt(date.year(), date.month() + 1, 1)
277        };
278        if let Some(next) = next_month {
279            let days_to_end = (next - date).num_days();
280            days_to_end <= 5
281        } else {
282            false
283        }
284    }
285
286    /// Check if date is in quarter-end period.
287    fn is_quarter_end(&self, date: NaiveDate) -> bool {
288        let quarter_end_months = [3, 6, 9, 12];
289        quarter_end_months.contains(&date.month()) && self.is_month_end(date)
290    }
291
292    /// Check if date is in year-end period.
293    fn is_year_end(&self, date: NaiveDate) -> bool {
294        date.month() == 12 && self.is_month_end(date)
295    }
296
297    /// Calculate day-of-week correlation with expected pattern.
298    fn calculate_dow_correlation(&self, observed: &[usize; 7]) -> f64 {
299        let total: usize = observed.iter().sum();
300        if total == 0 {
301            return 0.0;
302        }
303
304        // Normalize observed to proportions
305        let observed_norm: Vec<f64> = observed.iter().map(|&c| c as f64 / total as f64).collect();
306
307        // Normalize expected weights
308        let total_weight: f64 = DAY_WEIGHTS.iter().sum();
309        let expected_norm: Vec<f64> = DAY_WEIGHTS.iter().map(|&w| w / total_weight).collect();
310
311        // Pearson correlation
312        let mean_obs = observed_norm.iter().sum::<f64>() / 7.0;
313        let mean_exp = expected_norm.iter().sum::<f64>() / 7.0;
314
315        let numerator: f64 = (0..7)
316            .map(|i| (observed_norm[i] - mean_obs) * (expected_norm[i] - mean_exp))
317            .sum();
318
319        let var_obs: f64 = observed_norm.iter().map(|o| (o - mean_obs).powi(2)).sum();
320        let var_exp: f64 = expected_norm.iter().map(|e| (e - mean_exp).powi(2)).sum();
321
322        let denominator = (var_obs * var_exp).sqrt();
323
324        if denominator > 0.0 {
325            numerator / denominator
326        } else {
327            0.0
328        }
329    }
330
331    /// Calculate overall pattern correlation.
332    fn calculate_pattern_correlation(
333        &self,
334        daily_counts: &HashMap<NaiveDate, usize>,
335        start_date: NaiveDate,
336        end_date: NaiveDate,
337        avg_daily: f64,
338    ) -> f64 {
339        // Generate expected pattern for each day
340        let mut expected: Vec<f64> = Vec::new();
341        let mut observed: Vec<f64> = Vec::new();
342
343        let mut current = start_date;
344        while current <= end_date {
345            let mut multiplier = 1.0;
346
347            // Weekend effect
348            let weekday = current.weekday();
349            if weekday == Weekday::Sat || weekday == Weekday::Sun {
350                multiplier *= 0.1;
351            } else {
352                // Day-of-week effect
353                let dow_idx = weekday.num_days_from_monday() as usize;
354                multiplier *= DAY_WEIGHTS[dow_idx] / 1.0;
355            }
356
357            // Month-end effect
358            if self.is_month_end(current) {
359                multiplier *= MONTH_END_SPIKE / 2.5;
360            }
361
362            // Year-end effect (stronger)
363            if self.is_year_end(current) {
364                multiplier *= YEAR_END_SPIKE / MONTH_END_SPIKE;
365            } else if self.is_quarter_end(current) {
366                multiplier *= QUARTER_END_SPIKE / MONTH_END_SPIKE;
367            }
368
369            expected.push(avg_daily * multiplier);
370            observed.push(*daily_counts.get(&current).unwrap_or(&0) as f64);
371
372            current = current.succ_opt().unwrap_or(current);
373        }
374
375        // Calculate Pearson correlation
376        if expected.is_empty() {
377            return 0.0;
378        }
379
380        let n = expected.len() as f64;
381        let mean_exp = expected.iter().sum::<f64>() / n;
382        let mean_obs = observed.iter().sum::<f64>() / n;
383
384        let numerator: f64 = expected
385            .iter()
386            .zip(observed.iter())
387            .map(|(e, o)| (e - mean_exp) * (o - mean_obs))
388            .sum();
389
390        let var_exp: f64 = expected.iter().map(|e| (e - mean_exp).powi(2)).sum();
391        let var_obs: f64 = observed.iter().map(|o| (o - mean_obs).powi(2)).sum();
392
393        let denominator = (var_exp * var_obs).sqrt();
394
395        if denominator > 0.0 {
396            numerator / denominator
397        } else {
398            0.0
399        }
400    }
401}
402
403impl Default for TemporalAnalyzer {
404    fn default() -> Self {
405        Self::new()
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    fn create_entries(dates: Vec<NaiveDate>) -> Vec<TemporalEntry> {
414        dates
415            .into_iter()
416            .map(|d| TemporalEntry { posting_date: d })
417            .collect()
418    }
419
420    #[test]
421    fn test_temporal_analysis_basic() {
422        let entries = create_entries(vec![
423            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
424            NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
425            NaiveDate::from_ymd_opt(2024, 1, 17).unwrap(),
426            NaiveDate::from_ymd_opt(2024, 1, 18).unwrap(),
427            NaiveDate::from_ymd_opt(2024, 1, 19).unwrap(),
428            NaiveDate::from_ymd_opt(2024, 1, 22).unwrap(),
429            NaiveDate::from_ymd_opt(2024, 1, 23).unwrap(),
430            NaiveDate::from_ymd_opt(2024, 1, 24).unwrap(),
431            NaiveDate::from_ymd_opt(2024, 1, 25).unwrap(),
432            NaiveDate::from_ymd_opt(2024, 1, 26).unwrap(),
433        ]);
434
435        let analyzer = TemporalAnalyzer::new();
436        let result = analyzer.analyze(&entries).unwrap();
437
438        assert_eq!(result.sample_size, 10);
439        assert!(!result.day_of_week_distribution.is_empty());
440    }
441
442    #[test]
443    fn test_weekend_ratio() {
444        let mut entries = Vec::new();
445        // 10 weekday entries
446        for i in 1..=10 {
447            entries.push(TemporalEntry {
448                posting_date: NaiveDate::from_ymd_opt(2024, 1, i).unwrap(),
449            });
450        }
451        // 2 weekend entries (6th and 7th are Sat and Sun)
452        // Note: Jan 6, 2024 is Saturday, Jan 7 is Sunday
453
454        let analyzer = TemporalAnalyzer::new();
455        let result = analyzer.analyze(&entries).unwrap();
456
457        // Check weekend ratio is calculated
458        assert!(result.weekend_ratio >= 0.0);
459        assert!(result.weekend_ratio <= 1.0);
460    }
461
462    #[test]
463    fn test_insufficient_data() {
464        let entries = create_entries(vec![NaiveDate::from_ymd_opt(2024, 1, 1).unwrap()]);
465        let analyzer = TemporalAnalyzer::new();
466        let result = analyzer.analyze(&entries);
467        assert!(matches!(result, Err(EvalError::InsufficientData { .. })));
468    }
469}