datasynth_core/distributions/
temporal.rs

1//! Temporal distribution samplers for realistic posting patterns.
2//!
3//! Implements seasonality, working hour patterns, and period-end spikes
4//! commonly observed in enterprise accounting systems.
5
6use chrono::{Datelike, Duration, NaiveDate, NaiveTime, Weekday};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10
11use super::holidays::HolidayCalendar;
12use super::seasonality::IndustrySeasonality;
13
14/// Configuration for seasonality patterns.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct SeasonalityConfig {
17    /// Enable month-end volume spikes
18    pub month_end_spike: bool,
19    /// Month-end spike multiplier (e.g., 2.5 = 2.5x normal volume)
20    pub month_end_multiplier: f64,
21    /// Days before month-end to start spike
22    pub month_end_lead_days: u32,
23
24    /// Enable quarter-end spikes
25    pub quarter_end_spike: bool,
26    /// Quarter-end spike multiplier
27    pub quarter_end_multiplier: f64,
28
29    /// Enable year-end spikes
30    pub year_end_spike: bool,
31    /// Year-end spike multiplier
32    pub year_end_multiplier: f64,
33
34    /// Activity level on weekends (0.0 = no activity, 1.0 = normal)
35    pub weekend_activity: f64,
36    /// Activity level on holidays
37    pub holiday_activity: f64,
38
39    /// Enable day-of-week patterns (Monday catch-up, Friday slowdown)
40    pub day_of_week_patterns: bool,
41    /// Monday activity multiplier (catch-up from weekend)
42    pub monday_multiplier: f64,
43    /// Tuesday activity multiplier
44    pub tuesday_multiplier: f64,
45    /// Wednesday activity multiplier
46    pub wednesday_multiplier: f64,
47    /// Thursday activity multiplier
48    pub thursday_multiplier: f64,
49    /// Friday activity multiplier (early departures)
50    pub friday_multiplier: f64,
51}
52
53impl Default for SeasonalityConfig {
54    fn default() -> Self {
55        Self {
56            month_end_spike: true,
57            month_end_multiplier: 2.5,
58            month_end_lead_days: 5,
59            quarter_end_spike: true,
60            quarter_end_multiplier: 4.0,
61            year_end_spike: true,
62            year_end_multiplier: 6.0,
63            weekend_activity: 0.1,
64            holiday_activity: 0.05,
65            // Day-of-week patterns: humans work differently across the week
66            day_of_week_patterns: true,
67            monday_multiplier: 1.3,    // Catch-up from weekend backlog
68            tuesday_multiplier: 1.1,   // Still catching up
69            wednesday_multiplier: 1.0, // Midweek normal
70            thursday_multiplier: 1.0,  // Midweek normal
71            friday_multiplier: 0.85,   // Early departures, winding down
72        }
73    }
74}
75
76/// Configuration for working hours pattern.
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct WorkingHoursConfig {
79    /// Start of working day (hour, 0-23)
80    pub day_start: u8,
81    /// End of working day (hour, 0-23)
82    pub day_end: u8,
83    /// Peak hours during the day
84    pub peak_hours: Vec<u8>,
85    /// Weight for peak hours (multiplier)
86    pub peak_weight: f64,
87    /// Probability of after-hours posting
88    pub after_hours_probability: f64,
89}
90
91impl Default for WorkingHoursConfig {
92    fn default() -> Self {
93        Self {
94            day_start: 8,
95            day_end: 18,
96            peak_hours: vec![9, 10, 11, 14, 15, 16],
97            peak_weight: 1.5,
98            after_hours_probability: 0.05,
99        }
100    }
101}
102
103/// Sampler for temporal patterns in transaction generation.
104pub struct TemporalSampler {
105    rng: ChaCha8Rng,
106    seasonality_config: SeasonalityConfig,
107    working_hours_config: WorkingHoursConfig,
108    /// List of holiday dates (legacy)
109    holidays: Vec<NaiveDate>,
110    /// Industry-specific seasonality patterns (optional).
111    industry_seasonality: Option<IndustrySeasonality>,
112    /// Regional holiday calendar (optional).
113    holiday_calendar: Option<HolidayCalendar>,
114}
115
116impl TemporalSampler {
117    /// Create a new temporal sampler.
118    pub fn new(seed: u64) -> Self {
119        Self::with_config(
120            seed,
121            SeasonalityConfig::default(),
122            WorkingHoursConfig::default(),
123            Vec::new(),
124        )
125    }
126
127    /// Create a temporal sampler with custom configuration.
128    pub fn with_config(
129        seed: u64,
130        seasonality_config: SeasonalityConfig,
131        working_hours_config: WorkingHoursConfig,
132        holidays: Vec<NaiveDate>,
133    ) -> Self {
134        Self {
135            rng: ChaCha8Rng::seed_from_u64(seed),
136            seasonality_config,
137            working_hours_config,
138            holidays,
139            industry_seasonality: None,
140            holiday_calendar: None,
141        }
142    }
143
144    /// Create a temporal sampler with full enhanced configuration.
145    #[allow(clippy::too_many_arguments)]
146    pub fn with_full_config(
147        seed: u64,
148        seasonality_config: SeasonalityConfig,
149        working_hours_config: WorkingHoursConfig,
150        holidays: Vec<NaiveDate>,
151        industry_seasonality: Option<IndustrySeasonality>,
152        holiday_calendar: Option<HolidayCalendar>,
153    ) -> Self {
154        Self {
155            rng: ChaCha8Rng::seed_from_u64(seed),
156            seasonality_config,
157            working_hours_config,
158            holidays,
159            industry_seasonality,
160            holiday_calendar,
161        }
162    }
163
164    /// Set industry-specific seasonality.
165    pub fn with_industry_seasonality(mut self, seasonality: IndustrySeasonality) -> Self {
166        self.industry_seasonality = Some(seasonality);
167        self
168    }
169
170    /// Set regional holiday calendar.
171    pub fn with_holiday_calendar(mut self, calendar: HolidayCalendar) -> Self {
172        self.holiday_calendar = Some(calendar);
173        self
174    }
175
176    /// Set industry seasonality (mutable reference version).
177    pub fn set_industry_seasonality(&mut self, seasonality: IndustrySeasonality) {
178        self.industry_seasonality = Some(seasonality);
179    }
180
181    /// Set holiday calendar (mutable reference version).
182    pub fn set_holiday_calendar(&mut self, calendar: HolidayCalendar) {
183        self.holiday_calendar = Some(calendar);
184    }
185
186    /// Get the industry seasonality if set.
187    pub fn industry_seasonality(&self) -> Option<&IndustrySeasonality> {
188        self.industry_seasonality.as_ref()
189    }
190
191    /// Get the holiday calendar if set.
192    pub fn holiday_calendar(&self) -> Option<&HolidayCalendar> {
193        self.holiday_calendar.as_ref()
194    }
195
196    /// Generate US federal holidays for a given year.
197    pub fn generate_us_holidays(year: i32) -> Vec<NaiveDate> {
198        let mut holidays = Vec::new();
199
200        // New Year's Day
201        holidays.push(NaiveDate::from_ymd_opt(year, 1, 1).unwrap());
202        // Independence Day
203        holidays.push(NaiveDate::from_ymd_opt(year, 7, 4).unwrap());
204        // Christmas
205        holidays.push(NaiveDate::from_ymd_opt(year, 12, 25).unwrap());
206        // Thanksgiving (4th Thursday of November)
207        let first_thursday = (1..=7)
208            .map(|d| NaiveDate::from_ymd_opt(year, 11, d).unwrap())
209            .find(|d| d.weekday() == Weekday::Thu)
210            .unwrap();
211        let thanksgiving = first_thursday + Duration::weeks(3);
212        holidays.push(thanksgiving);
213
214        holidays
215    }
216
217    /// Check if a date is a weekend.
218    pub fn is_weekend(&self, date: NaiveDate) -> bool {
219        matches!(date.weekday(), Weekday::Sat | Weekday::Sun)
220    }
221
222    /// Get the day-of-week activity multiplier.
223    ///
224    /// Returns a multiplier based on the day of the week:
225    /// - Monday: Higher activity (catch-up from weekend)
226    /// - Tuesday: Slightly elevated
227    /// - Wednesday/Thursday: Normal
228    /// - Friday: Reduced (early departures, winding down)
229    /// - Saturday/Sunday: Uses weekend_activity setting
230    pub fn get_day_of_week_multiplier(&self, date: NaiveDate) -> f64 {
231        if !self.seasonality_config.day_of_week_patterns {
232            return 1.0;
233        }
234
235        match date.weekday() {
236            Weekday::Mon => self.seasonality_config.monday_multiplier,
237            Weekday::Tue => self.seasonality_config.tuesday_multiplier,
238            Weekday::Wed => self.seasonality_config.wednesday_multiplier,
239            Weekday::Thu => self.seasonality_config.thursday_multiplier,
240            Weekday::Fri => self.seasonality_config.friday_multiplier,
241            Weekday::Sat | Weekday::Sun => 1.0, // Weekend activity handled separately
242        }
243    }
244
245    /// Check if a date is a holiday.
246    pub fn is_holiday(&self, date: NaiveDate) -> bool {
247        // Check legacy holidays list
248        if self.holidays.contains(&date) {
249            return true;
250        }
251
252        // Check holiday calendar if available
253        if let Some(ref calendar) = self.holiday_calendar {
254            if calendar.is_holiday(date) {
255                return true;
256            }
257        }
258
259        false
260    }
261
262    /// Get the holiday activity multiplier for a date.
263    fn get_holiday_multiplier(&self, date: NaiveDate) -> f64 {
264        // Check holiday calendar first (more accurate)
265        if let Some(ref calendar) = self.holiday_calendar {
266            let mult = calendar.get_multiplier(date);
267            if mult < 1.0 {
268                return mult;
269            }
270        }
271
272        // Fall back to legacy holidays with default multiplier
273        if self.holidays.contains(&date) {
274            return self.seasonality_config.holiday_activity;
275        }
276
277        1.0
278    }
279
280    /// Check if a date is month-end (last N days of month).
281    pub fn is_month_end(&self, date: NaiveDate) -> bool {
282        let last_day = Self::last_day_of_month(date);
283        let days_until_end = (last_day - date).num_days();
284        days_until_end >= 0 && days_until_end < self.seasonality_config.month_end_lead_days as i64
285    }
286
287    /// Check if a date is quarter-end.
288    pub fn is_quarter_end(&self, date: NaiveDate) -> bool {
289        let month = date.month();
290        let is_quarter_end_month = matches!(month, 3 | 6 | 9 | 12);
291        is_quarter_end_month && self.is_month_end(date)
292    }
293
294    /// Check if a date is year-end.
295    pub fn is_year_end(&self, date: NaiveDate) -> bool {
296        date.month() == 12 && self.is_month_end(date)
297    }
298
299    /// Get the last day of the month for a given date.
300    pub fn last_day_of_month(date: NaiveDate) -> NaiveDate {
301        let year = date.year();
302        let month = date.month();
303
304        if month == 12 {
305            NaiveDate::from_ymd_opt(year + 1, 1, 1).unwrap() - Duration::days(1)
306        } else {
307            NaiveDate::from_ymd_opt(year, month + 1, 1).unwrap() - Duration::days(1)
308        }
309    }
310
311    /// Get the activity multiplier for a specific date.
312    ///
313    /// Combines:
314    /// - Base seasonality (month-end, quarter-end, year-end spikes)
315    /// - Day-of-week patterns (Monday catch-up, Friday slowdown)
316    /// - Weekend activity reduction
317    /// - Holiday activity reduction (from calendar or legacy list)
318    /// - Industry-specific seasonality (if configured)
319    pub fn get_date_multiplier(&self, date: NaiveDate) -> f64 {
320        let mut multiplier = 1.0;
321
322        // Weekend reduction
323        if self.is_weekend(date) {
324            multiplier *= self.seasonality_config.weekend_activity;
325        } else {
326            // Day-of-week patterns (only for weekdays)
327            multiplier *= self.get_day_of_week_multiplier(date);
328        }
329
330        // Holiday reduction (using enhanced calendar if available)
331        let holiday_mult = self.get_holiday_multiplier(date);
332        if holiday_mult < 1.0 {
333            multiplier *= holiday_mult;
334        }
335
336        // Period-end spikes (take the highest applicable)
337        if self.seasonality_config.year_end_spike && self.is_year_end(date) {
338            multiplier *= self.seasonality_config.year_end_multiplier;
339        } else if self.seasonality_config.quarter_end_spike && self.is_quarter_end(date) {
340            multiplier *= self.seasonality_config.quarter_end_multiplier;
341        } else if self.seasonality_config.month_end_spike && self.is_month_end(date) {
342            multiplier *= self.seasonality_config.month_end_multiplier;
343        }
344
345        // Industry-specific seasonality
346        if let Some(ref industry) = self.industry_seasonality {
347            let industry_mult = industry.get_multiplier(date);
348            // Industry multipliers are additive to base (they represent deviations from normal)
349            // A multiplier > 1.0 increases activity, < 1.0 decreases it
350            multiplier *= industry_mult;
351        }
352
353        multiplier
354    }
355
356    /// Get the base multiplier without industry seasonality.
357    pub fn get_base_date_multiplier(&self, date: NaiveDate) -> f64 {
358        let mut multiplier = 1.0;
359
360        if self.is_weekend(date) {
361            multiplier *= self.seasonality_config.weekend_activity;
362        } else {
363            // Day-of-week patterns (only for weekdays)
364            multiplier *= self.get_day_of_week_multiplier(date);
365        }
366
367        let holiday_mult = self.get_holiday_multiplier(date);
368        if holiday_mult < 1.0 {
369            multiplier *= holiday_mult;
370        }
371
372        if self.seasonality_config.year_end_spike && self.is_year_end(date) {
373            multiplier *= self.seasonality_config.year_end_multiplier;
374        } else if self.seasonality_config.quarter_end_spike && self.is_quarter_end(date) {
375            multiplier *= self.seasonality_config.quarter_end_multiplier;
376        } else if self.seasonality_config.month_end_spike && self.is_month_end(date) {
377            multiplier *= self.seasonality_config.month_end_multiplier;
378        }
379
380        multiplier
381    }
382
383    /// Get only the industry seasonality multiplier for a date.
384    pub fn get_industry_multiplier(&self, date: NaiveDate) -> f64 {
385        self.industry_seasonality
386            .as_ref()
387            .map(|s| s.get_multiplier(date))
388            .unwrap_or(1.0)
389    }
390
391    /// Sample a posting date within a range based on seasonality.
392    pub fn sample_date(&mut self, start: NaiveDate, end: NaiveDate) -> NaiveDate {
393        let days = (end - start).num_days() as usize;
394        if days == 0 {
395            return start;
396        }
397
398        // Build weighted distribution based on activity levels
399        let mut weights: Vec<f64> = (0..=days)
400            .map(|d| {
401                let date = start + Duration::days(d as i64);
402                self.get_date_multiplier(date)
403            })
404            .collect();
405
406        // Normalize weights
407        let total: f64 = weights.iter().sum();
408        weights.iter_mut().for_each(|w| *w /= total);
409
410        // Sample using weights
411        let p: f64 = self.rng.gen();
412        let mut cumulative = 0.0;
413        for (i, weight) in weights.iter().enumerate() {
414            cumulative += weight;
415            if p < cumulative {
416                return start + Duration::days(i as i64);
417            }
418        }
419
420        end
421    }
422
423    /// Sample a posting time based on working hours.
424    pub fn sample_time(&mut self, is_human: bool) -> NaiveTime {
425        if !is_human {
426            // Automated systems can post any time, but prefer off-hours
427            let hour = if self.rng.gen::<f64>() < 0.7 {
428                // 70% off-peak hours (night batch processing)
429                self.rng.gen_range(22..=23).clamp(0, 23)
430                    + if self.rng.gen_bool(0.5) {
431                        0
432                    } else {
433                        self.rng.gen_range(0..=5)
434                    }
435            } else {
436                self.rng.gen_range(0..24)
437            };
438            let minute = self.rng.gen_range(0..60);
439            let second = self.rng.gen_range(0..60);
440            return NaiveTime::from_hms_opt(hour.clamp(0, 23) as u32, minute, second).unwrap();
441        }
442
443        // Human users follow working hours
444        let hour = if self.rng.gen::<f64>() < self.working_hours_config.after_hours_probability {
445            // After hours
446            if self.rng.gen_bool(0.5) {
447                self.rng.gen_range(6..self.working_hours_config.day_start)
448            } else {
449                self.rng.gen_range(self.working_hours_config.day_end..22)
450            }
451        } else {
452            // Normal working hours with peak weighting
453            let is_peak = self.rng.gen::<f64>() < 0.6; // 60% during peak
454            if is_peak && !self.working_hours_config.peak_hours.is_empty() {
455                *self
456                    .working_hours_config
457                    .peak_hours
458                    .choose(&mut self.rng)
459                    .unwrap()
460            } else {
461                self.rng.gen_range(
462                    self.working_hours_config.day_start..self.working_hours_config.day_end,
463                )
464            }
465        };
466
467        let minute = self.rng.gen_range(0..60);
468        let second = self.rng.gen_range(0..60);
469
470        NaiveTime::from_hms_opt(hour as u32, minute, second).unwrap()
471    }
472
473    /// Calculate expected transaction count for a date given daily average.
474    pub fn expected_count_for_date(&self, date: NaiveDate, daily_average: f64) -> u64 {
475        let multiplier = self.get_date_multiplier(date);
476        (daily_average * multiplier).round() as u64
477    }
478
479    /// Reset the sampler with a new seed.
480    pub fn reset(&mut self, seed: u64) {
481        self.rng = ChaCha8Rng::seed_from_u64(seed);
482    }
483}
484
485/// Time period specification for generation.
486#[derive(Debug, Clone)]
487pub struct TimePeriod {
488    /// Start date (inclusive)
489    pub start_date: NaiveDate,
490    /// End date (inclusive)
491    pub end_date: NaiveDate,
492    /// Fiscal year
493    pub fiscal_year: u16,
494    /// Fiscal periods covered
495    pub fiscal_periods: Vec<u8>,
496}
497
498impl TimePeriod {
499    /// Create a time period for a full fiscal year.
500    pub fn fiscal_year(year: u16) -> Self {
501        Self {
502            start_date: NaiveDate::from_ymd_opt(year as i32, 1, 1).unwrap(),
503            end_date: NaiveDate::from_ymd_opt(year as i32, 12, 31).unwrap(),
504            fiscal_year: year,
505            fiscal_periods: (1..=12).collect(),
506        }
507    }
508
509    /// Create a time period for specific months.
510    pub fn months(year: u16, start_month: u8, num_months: u8) -> Self {
511        let start_date = NaiveDate::from_ymd_opt(year as i32, start_month as u32, 1).unwrap();
512        let end_month = ((start_month - 1 + num_months - 1) % 12) + 1;
513        let end_year = year + (start_month as u16 - 1 + num_months as u16 - 1) / 12;
514        let end_date = TemporalSampler::last_day_of_month(
515            NaiveDate::from_ymd_opt(end_year as i32, end_month as u32, 1).unwrap(),
516        );
517
518        Self {
519            start_date,
520            end_date,
521            fiscal_year: year,
522            fiscal_periods: (start_month..start_month + num_months).collect(),
523        }
524    }
525
526    /// Get total days in the period.
527    pub fn total_days(&self) -> i64 {
528        (self.end_date - self.start_date).num_days() + 1
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535    use chrono::Timelike;
536
537    #[test]
538    fn test_is_weekend() {
539        let sampler = TemporalSampler::new(42);
540        let saturday = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
541        let sunday = NaiveDate::from_ymd_opt(2024, 6, 16).unwrap();
542        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap();
543
544        assert!(sampler.is_weekend(saturday));
545        assert!(sampler.is_weekend(sunday));
546        assert!(!sampler.is_weekend(monday));
547    }
548
549    #[test]
550    fn test_is_month_end() {
551        let sampler = TemporalSampler::new(42);
552        let month_end = NaiveDate::from_ymd_opt(2024, 6, 28).unwrap();
553        let month_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
554
555        assert!(sampler.is_month_end(month_end));
556        assert!(!sampler.is_month_end(month_start));
557    }
558
559    #[test]
560    fn test_date_multiplier() {
561        let sampler = TemporalSampler::new(42);
562
563        // Regular weekday (Wednesday = 1.0)
564        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 12).unwrap(); // Wednesday
565        assert!((sampler.get_date_multiplier(regular_day) - 1.0).abs() < 0.01);
566
567        // Weekend
568        let weekend = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Saturday
569        assert!(sampler.get_date_multiplier(weekend) < 0.2);
570
571        // Month end
572        let month_end = NaiveDate::from_ymd_opt(2024, 6, 28).unwrap();
573        assert!(sampler.get_date_multiplier(month_end) > 2.0);
574    }
575
576    #[test]
577    fn test_day_of_week_patterns() {
578        let sampler = TemporalSampler::new(42);
579
580        // June 2024: 10=Mon, 11=Tue, 12=Wed, 13=Thu, 14=Fri
581        let monday = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap();
582        let tuesday = NaiveDate::from_ymd_opt(2024, 6, 11).unwrap();
583        let wednesday = NaiveDate::from_ymd_opt(2024, 6, 12).unwrap();
584        let thursday = NaiveDate::from_ymd_opt(2024, 6, 13).unwrap();
585        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap();
586
587        // Monday should have highest weekday multiplier (catch-up)
588        let mon_mult = sampler.get_day_of_week_multiplier(monday);
589        assert!((mon_mult - 1.3).abs() < 0.01);
590
591        // Tuesday slightly elevated
592        let tue_mult = sampler.get_day_of_week_multiplier(tuesday);
593        assert!((tue_mult - 1.1).abs() < 0.01);
594
595        // Wednesday/Thursday normal
596        let wed_mult = sampler.get_day_of_week_multiplier(wednesday);
597        let thu_mult = sampler.get_day_of_week_multiplier(thursday);
598        assert!((wed_mult - 1.0).abs() < 0.01);
599        assert!((thu_mult - 1.0).abs() < 0.01);
600
601        // Friday reduced (winding down)
602        let fri_mult = sampler.get_day_of_week_multiplier(friday);
603        assert!((fri_mult - 0.85).abs() < 0.01);
604
605        // Verify the pattern is applied in get_date_multiplier
606        // (excluding period-end effects)
607        assert!(sampler.get_date_multiplier(monday) > sampler.get_date_multiplier(friday));
608    }
609
610    #[test]
611    fn test_sample_time_human() {
612        let mut sampler = TemporalSampler::new(42);
613
614        for _ in 0..100 {
615            let time = sampler.sample_time(true);
616            // Most times should be during working hours
617            let hour = time.hour();
618            // Just verify it's a valid time
619            assert!(hour < 24);
620        }
621    }
622
623    #[test]
624    fn test_time_period() {
625        let period = TimePeriod::fiscal_year(2024);
626        assert_eq!(period.total_days(), 366); // 2024 is leap year
627
628        let partial = TimePeriod::months(2024, 1, 6);
629        assert!(partial.total_days() > 180);
630        assert!(partial.total_days() < 185);
631    }
632}