Skip to main content

datasynth_core/distributions/
temporal.rs

1//! Temporal distribution samplers for realistic posting patterns.
2//!
3//! Implements seasonality, working hour patterns, and period-end spikes
4//! commonly observed in enterprise accounting systems.
5
6use chrono::{Datelike, Duration, NaiveDate, NaiveTime, Weekday};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10
11use super::holidays::HolidayCalendar;
12use super::period_end::PeriodEndDynamics;
13use super::seasonality::IndustrySeasonality;
14
15/// Configuration for seasonality patterns.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct SeasonalityConfig {
18    /// Enable month-end volume spikes
19    pub month_end_spike: bool,
20    /// Month-end spike multiplier (e.g., 2.5 = 2.5x normal volume)
21    pub month_end_multiplier: f64,
22    /// Days before month-end to start spike
23    pub month_end_lead_days: u32,
24
25    /// Enable quarter-end spikes
26    pub quarter_end_spike: bool,
27    /// Quarter-end spike multiplier
28    pub quarter_end_multiplier: f64,
29
30    /// Enable year-end spikes
31    pub year_end_spike: bool,
32    /// Year-end spike multiplier
33    pub year_end_multiplier: f64,
34
35    /// Activity level on weekends (0.0 = no activity, 1.0 = normal)
36    pub weekend_activity: f64,
37    /// Activity level on holidays
38    pub holiday_activity: f64,
39
40    /// Enable day-of-week patterns (Monday catch-up, Friday slowdown)
41    pub day_of_week_patterns: bool,
42    /// Monday activity multiplier (catch-up from weekend)
43    pub monday_multiplier: f64,
44    /// Tuesday activity multiplier
45    pub tuesday_multiplier: f64,
46    /// Wednesday activity multiplier
47    pub wednesday_multiplier: f64,
48    /// Thursday activity multiplier
49    pub thursday_multiplier: f64,
50    /// Friday activity multiplier (early departures)
51    pub friday_multiplier: f64,
52}
53
54impl Default for SeasonalityConfig {
55    fn default() -> Self {
56        Self {
57            month_end_spike: true,
58            month_end_multiplier: 2.5,
59            month_end_lead_days: 5,
60            quarter_end_spike: true,
61            quarter_end_multiplier: 4.0,
62            year_end_spike: true,
63            year_end_multiplier: 6.0,
64            weekend_activity: 0.1,
65            holiday_activity: 0.05,
66            // Day-of-week patterns: humans work differently across the week
67            day_of_week_patterns: true,
68            monday_multiplier: 1.3,    // Catch-up from weekend backlog
69            tuesday_multiplier: 1.1,   // Still catching up
70            wednesday_multiplier: 1.0, // Midweek normal
71            thursday_multiplier: 1.0,  // Midweek normal
72            friday_multiplier: 0.85,   // Early departures, winding down
73        }
74    }
75}
76
77/// Configuration for working hours pattern.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct WorkingHoursConfig {
80    /// Start of working day (hour, 0-23)
81    pub day_start: u8,
82    /// End of working day (hour, 0-23)
83    pub day_end: u8,
84    /// Peak hours during the day
85    pub peak_hours: Vec<u8>,
86    /// Weight for peak hours (multiplier)
87    pub peak_weight: f64,
88    /// Probability of after-hours posting
89    pub after_hours_probability: f64,
90}
91
92impl Default for WorkingHoursConfig {
93    fn default() -> Self {
94        Self {
95            day_start: 8,
96            day_end: 18,
97            peak_hours: vec![9, 10, 11, 14, 15, 16],
98            peak_weight: 1.5,
99            after_hours_probability: 0.05,
100        }
101    }
102}
103
104/// Configuration for intra-day posting patterns.
105///
106/// Defines segments of the business day with different activity multipliers,
107/// allowing for realistic modeling of morning spikes, lunch dips, and end-of-day rushes.
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct IntraDayPatterns {
110    /// Whether intra-day patterns are enabled.
111    pub enabled: bool,
112    /// Time segments with activity multipliers.
113    pub segments: Vec<IntraDaySegment>,
114}
115
116impl Default for IntraDayPatterns {
117    fn default() -> Self {
118        Self {
119            enabled: true,
120            segments: vec![
121                IntraDaySegment {
122                    name: "morning_spike".to_string(),
123                    start: NaiveTime::from_hms_opt(8, 30, 0).expect("valid date/time components"),
124                    end: NaiveTime::from_hms_opt(10, 0, 0).expect("valid date/time components"),
125                    multiplier: 1.8,
126                    posting_type: PostingType::Both,
127                },
128                IntraDaySegment {
129                    name: "mid_morning".to_string(),
130                    start: NaiveTime::from_hms_opt(10, 0, 0).expect("valid date/time components"),
131                    end: NaiveTime::from_hms_opt(12, 0, 0).expect("valid date/time components"),
132                    multiplier: 1.2,
133                    posting_type: PostingType::Both,
134                },
135                IntraDaySegment {
136                    name: "lunch_dip".to_string(),
137                    start: NaiveTime::from_hms_opt(12, 0, 0).expect("valid date/time components"),
138                    end: NaiveTime::from_hms_opt(13, 30, 0).expect("valid date/time components"),
139                    multiplier: 0.4,
140                    posting_type: PostingType::Human,
141                },
142                IntraDaySegment {
143                    name: "afternoon".to_string(),
144                    start: NaiveTime::from_hms_opt(13, 30, 0).expect("valid date/time components"),
145                    end: NaiveTime::from_hms_opt(16, 0, 0).expect("valid date/time components"),
146                    multiplier: 1.1,
147                    posting_type: PostingType::Both,
148                },
149                IntraDaySegment {
150                    name: "eod_rush".to_string(),
151                    start: NaiveTime::from_hms_opt(16, 0, 0).expect("valid date/time components"),
152                    end: NaiveTime::from_hms_opt(17, 30, 0).expect("valid date/time components"),
153                    multiplier: 1.5,
154                    posting_type: PostingType::Both,
155                },
156            ],
157        }
158    }
159}
160
161impl IntraDayPatterns {
162    /// Creates intra-day patterns with no segments (disabled).
163    pub fn disabled() -> Self {
164        Self {
165            enabled: false,
166            segments: Vec::new(),
167        }
168    }
169
170    /// Creates patterns with custom segments.
171    pub fn with_segments(segments: Vec<IntraDaySegment>) -> Self {
172        Self {
173            enabled: true,
174            segments,
175        }
176    }
177
178    /// Gets the multiplier for a given time based on posting type.
179    pub fn get_multiplier(&self, time: NaiveTime, is_human: bool) -> f64 {
180        if !self.enabled {
181            return 1.0;
182        }
183
184        for segment in &self.segments {
185            if time >= segment.start && time < segment.end {
186                // Check if this segment applies to the posting type
187                let applies = match segment.posting_type {
188                    PostingType::Human => is_human,
189                    PostingType::System => !is_human,
190                    PostingType::Both => true,
191                };
192                if applies {
193                    return segment.multiplier;
194                }
195            }
196        }
197
198        1.0 // Default multiplier if no segment matches
199    }
200}
201
202/// A segment of the business day with specific activity patterns.
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct IntraDaySegment {
205    /// Name of the segment (e.g., "morning_spike", "lunch_dip").
206    pub name: String,
207    /// Start time of the segment.
208    pub start: NaiveTime,
209    /// End time of the segment.
210    pub end: NaiveTime,
211    /// Activity multiplier for this segment (1.0 = normal).
212    pub multiplier: f64,
213    /// Type of postings this segment applies to.
214    pub posting_type: PostingType,
215}
216
217/// Type of posting for intra-day pattern matching.
218#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
219#[serde(rename_all = "snake_case")]
220pub enum PostingType {
221    /// Human/manual postings only.
222    Human,
223    /// System/automated postings only.
224    System,
225    /// Both human and system postings.
226    Both,
227}
228
229/// Sampler for temporal patterns in transaction generation.
230pub struct TemporalSampler {
231    rng: ChaCha8Rng,
232    seasonality_config: SeasonalityConfig,
233    working_hours_config: WorkingHoursConfig,
234    /// List of holiday dates (legacy)
235    holidays: Vec<NaiveDate>,
236    /// Industry-specific seasonality patterns (optional).
237    industry_seasonality: Option<IndustrySeasonality>,
238    /// Regional holiday calendar (optional).
239    holiday_calendar: Option<HolidayCalendar>,
240    /// Period-end dynamics for decay curves (optional).
241    period_end_dynamics: Option<PeriodEndDynamics>,
242    /// Whether to use period-end dynamics instead of legacy flat multipliers.
243    use_period_end_dynamics: bool,
244    /// Intra-day patterns for time-of-day activity variation.
245    intra_day_patterns: Option<IntraDayPatterns>,
246    /// Cached cumulative distribution for date sampling.
247    /// Pre-computed on first `sample_date` call with a given date range.
248    /// Avoids recomputing 365+ weights per call.
249    cached_date_cdf: Option<CachedDateCdf>,
250}
251
252/// Pre-computed CDF for date sampling, avoiding per-call allocation.
253struct CachedDateCdf {
254    start: NaiveDate,
255    end: NaiveDate,
256    /// Cumulative distribution function (pre-normalized)
257    cdf: Vec<f64>,
258}
259
260impl TemporalSampler {
261    /// Create a new temporal sampler.
262    pub fn new(seed: u64) -> Self {
263        Self::with_config(
264            seed,
265            SeasonalityConfig::default(),
266            WorkingHoursConfig::default(),
267            Vec::new(),
268        )
269    }
270
271    /// Create a temporal sampler with custom configuration.
272    pub fn with_config(
273        seed: u64,
274        seasonality_config: SeasonalityConfig,
275        working_hours_config: WorkingHoursConfig,
276        holidays: Vec<NaiveDate>,
277    ) -> Self {
278        Self {
279            rng: ChaCha8Rng::seed_from_u64(seed),
280            seasonality_config,
281            working_hours_config,
282            holidays,
283            industry_seasonality: None,
284            holiday_calendar: None,
285            period_end_dynamics: None,
286            use_period_end_dynamics: false,
287            intra_day_patterns: None,
288            cached_date_cdf: None,
289        }
290    }
291
292    /// Create a temporal sampler with full enhanced configuration.
293    #[allow(clippy::too_many_arguments)]
294    pub fn with_full_config(
295        seed: u64,
296        seasonality_config: SeasonalityConfig,
297        working_hours_config: WorkingHoursConfig,
298        holidays: Vec<NaiveDate>,
299        industry_seasonality: Option<IndustrySeasonality>,
300        holiday_calendar: Option<HolidayCalendar>,
301    ) -> Self {
302        Self {
303            rng: ChaCha8Rng::seed_from_u64(seed),
304            seasonality_config,
305            working_hours_config,
306            holidays,
307            industry_seasonality,
308            holiday_calendar,
309            period_end_dynamics: None,
310            use_period_end_dynamics: false,
311            intra_day_patterns: None,
312            cached_date_cdf: None,
313        }
314    }
315
316    /// Create a temporal sampler with period-end dynamics.
317    #[allow(clippy::too_many_arguments)]
318    pub fn with_period_end_dynamics(
319        seed: u64,
320        seasonality_config: SeasonalityConfig,
321        working_hours_config: WorkingHoursConfig,
322        holidays: Vec<NaiveDate>,
323        industry_seasonality: Option<IndustrySeasonality>,
324        holiday_calendar: Option<HolidayCalendar>,
325        period_end_dynamics: PeriodEndDynamics,
326    ) -> Self {
327        Self {
328            rng: ChaCha8Rng::seed_from_u64(seed),
329            seasonality_config,
330            working_hours_config,
331            holidays,
332            industry_seasonality,
333            holiday_calendar,
334            period_end_dynamics: Some(period_end_dynamics),
335            use_period_end_dynamics: true,
336            intra_day_patterns: None,
337            cached_date_cdf: None,
338        }
339    }
340
341    /// Sets the intra-day patterns for time-of-day activity variation.
342    pub fn set_intra_day_patterns(&mut self, patterns: IntraDayPatterns) {
343        self.intra_day_patterns = Some(patterns);
344    }
345
346    /// Gets the intra-day multiplier for a given time.
347    pub fn get_intra_day_multiplier(&self, time: NaiveTime, is_human: bool) -> f64 {
348        self.intra_day_patterns
349            .as_ref()
350            .map(|p| p.get_multiplier(time, is_human))
351            .unwrap_or(1.0)
352    }
353
354    /// Set industry-specific seasonality.
355    pub fn with_industry_seasonality(mut self, seasonality: IndustrySeasonality) -> Self {
356        self.industry_seasonality = Some(seasonality);
357        self
358    }
359
360    /// Set regional holiday calendar.
361    pub fn with_holiday_calendar(mut self, calendar: HolidayCalendar) -> Self {
362        self.holiday_calendar = Some(calendar);
363        self
364    }
365
366    /// Set industry seasonality (mutable reference version).
367    pub fn set_industry_seasonality(&mut self, seasonality: IndustrySeasonality) {
368        self.industry_seasonality = Some(seasonality);
369    }
370
371    /// Set holiday calendar (mutable reference version).
372    pub fn set_holiday_calendar(&mut self, calendar: HolidayCalendar) {
373        self.holiday_calendar = Some(calendar);
374    }
375
376    /// Set period-end dynamics.
377    pub fn with_period_end(mut self, dynamics: PeriodEndDynamics) -> Self {
378        self.period_end_dynamics = Some(dynamics);
379        self.use_period_end_dynamics = true;
380        self
381    }
382
383    /// Set period-end dynamics (mutable reference version).
384    pub fn set_period_end_dynamics(&mut self, dynamics: PeriodEndDynamics) {
385        self.period_end_dynamics = Some(dynamics);
386        self.use_period_end_dynamics = true;
387    }
388
389    /// Get the period-end dynamics if set.
390    pub fn period_end_dynamics(&self) -> Option<&PeriodEndDynamics> {
391        self.period_end_dynamics.as_ref()
392    }
393
394    /// Enable or disable period-end dynamics usage.
395    pub fn set_use_period_end_dynamics(&mut self, enabled: bool) {
396        self.use_period_end_dynamics = enabled;
397    }
398
399    /// Get the industry seasonality if set.
400    pub fn industry_seasonality(&self) -> Option<&IndustrySeasonality> {
401        self.industry_seasonality.as_ref()
402    }
403
404    /// Get the holiday calendar if set.
405    pub fn holiday_calendar(&self) -> Option<&HolidayCalendar> {
406        self.holiday_calendar.as_ref()
407    }
408
409    /// Generate US federal holidays for a given year.
410    pub fn generate_us_holidays(year: i32) -> Vec<NaiveDate> {
411        let mut holidays = Vec::new();
412
413        // New Year's Day
414        holidays.push(NaiveDate::from_ymd_opt(year, 1, 1).expect("valid date/time components"));
415        // Independence Day
416        holidays.push(NaiveDate::from_ymd_opt(year, 7, 4).expect("valid date/time components"));
417        // Christmas
418        holidays.push(NaiveDate::from_ymd_opt(year, 12, 25).expect("valid date/time components"));
419        // Thanksgiving (4th Thursday of November)
420        let first_thursday = (1..=7)
421            .map(|d| NaiveDate::from_ymd_opt(year, 11, d).expect("valid date/time components"))
422            .find(|d| d.weekday() == Weekday::Thu)
423            .expect("valid date/time components");
424        let thanksgiving = first_thursday + Duration::weeks(3);
425        holidays.push(thanksgiving);
426
427        holidays
428    }
429
430    /// Check if a date is a weekend.
431    pub fn is_weekend(&self, date: NaiveDate) -> bool {
432        matches!(date.weekday(), Weekday::Sat | Weekday::Sun)
433    }
434
435    /// Get the day-of-week activity multiplier.
436    ///
437    /// Returns a multiplier based on the day of the week:
438    /// - Monday: Higher activity (catch-up from weekend)
439    /// - Tuesday: Slightly elevated
440    /// - Wednesday/Thursday: Normal
441    /// - Friday: Reduced (early departures, winding down)
442    /// - Saturday/Sunday: Uses weekend_activity setting
443    pub fn get_day_of_week_multiplier(&self, date: NaiveDate) -> f64 {
444        if !self.seasonality_config.day_of_week_patterns {
445            return 1.0;
446        }
447
448        match date.weekday() {
449            Weekday::Mon => self.seasonality_config.monday_multiplier,
450            Weekday::Tue => self.seasonality_config.tuesday_multiplier,
451            Weekday::Wed => self.seasonality_config.wednesday_multiplier,
452            Weekday::Thu => self.seasonality_config.thursday_multiplier,
453            Weekday::Fri => self.seasonality_config.friday_multiplier,
454            Weekday::Sat | Weekday::Sun => 1.0, // Weekend activity handled separately
455        }
456    }
457
458    /// Check if a date is a holiday.
459    pub fn is_holiday(&self, date: NaiveDate) -> bool {
460        // Check legacy holidays list
461        if self.holidays.contains(&date) {
462            return true;
463        }
464
465        // Check holiday calendar if available
466        if let Some(ref calendar) = self.holiday_calendar {
467            if calendar.is_holiday(date) {
468                return true;
469            }
470        }
471
472        false
473    }
474
475    /// Get the holiday activity multiplier for a date.
476    fn get_holiday_multiplier(&self, date: NaiveDate) -> f64 {
477        // Check holiday calendar first (more accurate)
478        if let Some(ref calendar) = self.holiday_calendar {
479            let mult = calendar.get_multiplier(date);
480            if mult < 1.0 {
481                return mult;
482            }
483        }
484
485        // Fall back to legacy holidays with default multiplier
486        if self.holidays.contains(&date) {
487            return self.seasonality_config.holiday_activity;
488        }
489
490        1.0
491    }
492
493    /// Check if a date is month-end (last N days of month).
494    pub fn is_month_end(&self, date: NaiveDate) -> bool {
495        let last_day = Self::last_day_of_month(date);
496        let days_until_end = (last_day - date).num_days();
497        days_until_end >= 0 && days_until_end < self.seasonality_config.month_end_lead_days as i64
498    }
499
500    /// Check if a date is quarter-end.
501    pub fn is_quarter_end(&self, date: NaiveDate) -> bool {
502        let month = date.month();
503        let is_quarter_end_month = matches!(month, 3 | 6 | 9 | 12);
504        is_quarter_end_month && self.is_month_end(date)
505    }
506
507    /// Check if a date is year-end.
508    pub fn is_year_end(&self, date: NaiveDate) -> bool {
509        date.month() == 12 && self.is_month_end(date)
510    }
511
512    /// Get the last day of the month for a given date.
513    pub fn last_day_of_month(date: NaiveDate) -> NaiveDate {
514        let year = date.year();
515        let month = date.month();
516
517        if month == 12 {
518            NaiveDate::from_ymd_opt(year + 1, 1, 1).expect("valid date/time components")
519                - Duration::days(1)
520        } else {
521            NaiveDate::from_ymd_opt(year, month + 1, 1).expect("valid date/time components")
522                - Duration::days(1)
523        }
524    }
525
526    /// Get the activity multiplier for a specific date.
527    ///
528    /// Combines:
529    /// - Base seasonality (month-end, quarter-end, year-end spikes)
530    /// - Day-of-week patterns (Monday catch-up, Friday slowdown)
531    /// - Weekend activity reduction
532    /// - Holiday activity reduction (from calendar or legacy list)
533    /// - Industry-specific seasonality (if configured)
534    /// - Period-end dynamics (if configured, replaces legacy flat multipliers)
535    pub fn get_date_multiplier(&self, date: NaiveDate) -> f64 {
536        let mut multiplier = 1.0;
537
538        // Weekend reduction
539        if self.is_weekend(date) {
540            multiplier *= self.seasonality_config.weekend_activity;
541        } else {
542            // Day-of-week patterns (only for weekdays)
543            multiplier *= self.get_day_of_week_multiplier(date);
544        }
545
546        // Holiday reduction (using enhanced calendar if available)
547        let holiday_mult = self.get_holiday_multiplier(date);
548        if holiday_mult < 1.0 {
549            multiplier *= holiday_mult;
550        }
551
552        // Period-end spikes - use dynamics if available, otherwise legacy flat multipliers
553        if self.use_period_end_dynamics {
554            if let Some(ref dynamics) = self.period_end_dynamics {
555                let period_mult = dynamics.get_multiplier_for_date(date);
556                multiplier *= period_mult;
557            }
558        } else {
559            // Legacy flat multipliers (take the highest applicable)
560            if self.seasonality_config.year_end_spike && self.is_year_end(date) {
561                multiplier *= self.seasonality_config.year_end_multiplier;
562            } else if self.seasonality_config.quarter_end_spike && self.is_quarter_end(date) {
563                multiplier *= self.seasonality_config.quarter_end_multiplier;
564            } else if self.seasonality_config.month_end_spike && self.is_month_end(date) {
565                multiplier *= self.seasonality_config.month_end_multiplier;
566            }
567        }
568
569        // Industry-specific seasonality
570        if let Some(ref industry) = self.industry_seasonality {
571            let industry_mult = industry.get_multiplier(date);
572            // Industry multipliers are additive to base (they represent deviations from normal)
573            // A multiplier > 1.0 increases activity, < 1.0 decreases it
574            multiplier *= industry_mult;
575        }
576
577        multiplier
578    }
579
580    /// Get the period-end multiplier for a date.
581    ///
582    /// Returns the period-end component of the date multiplier,
583    /// using dynamics if available, otherwise legacy flat multipliers.
584    pub fn get_period_end_multiplier(&self, date: NaiveDate) -> f64 {
585        if self.use_period_end_dynamics {
586            if let Some(ref dynamics) = self.period_end_dynamics {
587                return dynamics.get_multiplier_for_date(date);
588            }
589        }
590
591        // Legacy flat multipliers
592        if self.seasonality_config.year_end_spike && self.is_year_end(date) {
593            self.seasonality_config.year_end_multiplier
594        } else if self.seasonality_config.quarter_end_spike && self.is_quarter_end(date) {
595            self.seasonality_config.quarter_end_multiplier
596        } else if self.seasonality_config.month_end_spike && self.is_month_end(date) {
597            self.seasonality_config.month_end_multiplier
598        } else {
599            1.0
600        }
601    }
602
603    /// Get the base multiplier without industry seasonality.
604    pub fn get_base_date_multiplier(&self, date: NaiveDate) -> f64 {
605        let mut multiplier = 1.0;
606
607        if self.is_weekend(date) {
608            multiplier *= self.seasonality_config.weekend_activity;
609        } else {
610            // Day-of-week patterns (only for weekdays)
611            multiplier *= self.get_day_of_week_multiplier(date);
612        }
613
614        let holiday_mult = self.get_holiday_multiplier(date);
615        if holiday_mult < 1.0 {
616            multiplier *= holiday_mult;
617        }
618
619        // Period-end spikes - use dynamics if available
620        if self.use_period_end_dynamics {
621            if let Some(ref dynamics) = self.period_end_dynamics {
622                let period_mult = dynamics.get_multiplier_for_date(date);
623                multiplier *= period_mult;
624            }
625        } else {
626            // Legacy flat multipliers
627            if self.seasonality_config.year_end_spike && self.is_year_end(date) {
628                multiplier *= self.seasonality_config.year_end_multiplier;
629            } else if self.seasonality_config.quarter_end_spike && self.is_quarter_end(date) {
630                multiplier *= self.seasonality_config.quarter_end_multiplier;
631            } else if self.seasonality_config.month_end_spike && self.is_month_end(date) {
632                multiplier *= self.seasonality_config.month_end_multiplier;
633            }
634        }
635
636        multiplier
637    }
638
639    /// Get only the industry seasonality multiplier for a date.
640    pub fn get_industry_multiplier(&self, date: NaiveDate) -> f64 {
641        self.industry_seasonality
642            .as_ref()
643            .map(|s| s.get_multiplier(date))
644            .unwrap_or(1.0)
645    }
646
647    /// Sample a posting date within a range based on seasonality.
648    ///
649    /// Uses a cached cumulative distribution function (CDF) to avoid
650    /// recomputing date weights on every call. The CDF is computed once
651    /// for a given (start, end) range and reused for subsequent calls.
652    #[inline]
653    pub fn sample_date(&mut self, start: NaiveDate, end: NaiveDate) -> NaiveDate {
654        let days = (end - start).num_days() as usize;
655        if days == 0 {
656            return start;
657        }
658
659        // Check if we have a cached CDF for this range
660        let need_rebuild = match &self.cached_date_cdf {
661            Some(cached) => cached.start != start || cached.end != end,
662            None => true,
663        };
664
665        if need_rebuild {
666            // Build weighted CDF based on activity levels
667            let mut cdf = Vec::with_capacity(days + 1);
668            let mut cumulative = 0.0;
669            for d in 0..=days {
670                let date = start + Duration::days(d as i64);
671                cumulative += self.get_date_multiplier(date);
672                cdf.push(cumulative);
673            }
674
675            // Normalize to [0, 1]
676            let total = cumulative;
677            if total > 0.0 {
678                cdf.iter_mut().for_each(|w| *w /= total);
679            }
680            // Ensure last entry is exactly 1.0
681            if let Some(last) = cdf.last_mut() {
682                *last = 1.0;
683            }
684
685            self.cached_date_cdf = Some(CachedDateCdf { start, end, cdf });
686        }
687
688        // Sample using binary search over the cached CDF
689        let p: f64 = self.rng.random();
690        // SAFETY: cached_date_cdf is guaranteed to be Some — we just set it above
691        let cdf = &self
692            .cached_date_cdf
693            .as_ref()
694            .expect("CDF was just computed")
695            .cdf;
696        let idx = cdf.partition_point(|&w| w < p);
697        let idx = idx.min(days);
698
699        start + Duration::days(idx as i64)
700    }
701
702    /// Sample a posting time based on working hours.
703    #[inline]
704    pub fn sample_time(&mut self, is_human: bool) -> NaiveTime {
705        if !is_human {
706            // Automated systems can post any time, but prefer off-hours
707            let hour = if self.rng.random::<f64>() < 0.7 {
708                // 70% off-peak hours (night batch processing)
709                self.rng.random_range(22..=23).clamp(0, 23)
710                    + if self.rng.random_bool(0.5) {
711                        0
712                    } else {
713                        self.rng.random_range(0..=5)
714                    }
715            } else {
716                self.rng.random_range(0..24)
717            };
718            let minute = self.rng.random_range(0..60);
719            let second = self.rng.random_range(0..60);
720            return NaiveTime::from_hms_opt(hour.clamp(0, 23) as u32, minute, second)
721                .expect("valid date/time components");
722        }
723
724        // Human users follow working hours
725        let hour = if self.rng.random::<f64>() < self.working_hours_config.after_hours_probability {
726            // After hours
727            if self.rng.random_bool(0.5) {
728                self.rng
729                    .random_range(6..self.working_hours_config.day_start)
730            } else {
731                self.rng.random_range(self.working_hours_config.day_end..22)
732            }
733        } else {
734            // Normal working hours with peak weighting
735            let is_peak = self.rng.random::<f64>() < 0.6; // 60% during peak
736            if is_peak && !self.working_hours_config.peak_hours.is_empty() {
737                *self
738                    .working_hours_config
739                    .peak_hours
740                    .choose(&mut self.rng)
741                    .expect("valid date/time components")
742            } else {
743                self.rng.random_range(
744                    self.working_hours_config.day_start..self.working_hours_config.day_end,
745                )
746            }
747        };
748
749        let minute = self.rng.random_range(0..60);
750        let second = self.rng.random_range(0..60);
751
752        NaiveTime::from_hms_opt(hour as u32, minute, second).expect("valid date/time components")
753    }
754
755    /// Calculate expected transaction count for a date given daily average.
756    pub fn expected_count_for_date(&self, date: NaiveDate, daily_average: f64) -> u64 {
757        let multiplier = self.get_date_multiplier(date);
758        (daily_average * multiplier).round() as u64
759    }
760
761    /// Reset the sampler with a new seed.
762    pub fn reset(&mut self, seed: u64) {
763        self.rng = ChaCha8Rng::seed_from_u64(seed);
764        self.cached_date_cdf = None;
765    }
766}
767
768/// Time period specification for generation.
769#[derive(Debug, Clone)]
770pub struct TimePeriod {
771    /// Start date (inclusive)
772    pub start_date: NaiveDate,
773    /// End date (inclusive)
774    pub end_date: NaiveDate,
775    /// Fiscal year
776    pub fiscal_year: u16,
777    /// Fiscal periods covered
778    pub fiscal_periods: Vec<u8>,
779}
780
781impl TimePeriod {
782    /// Create a time period for a full fiscal year.
783    pub fn fiscal_year(year: u16) -> Self {
784        Self {
785            start_date: NaiveDate::from_ymd_opt(year as i32, 1, 1)
786                .expect("valid date/time components"),
787            end_date: NaiveDate::from_ymd_opt(year as i32, 12, 31)
788                .expect("valid date/time components"),
789            fiscal_year: year,
790            fiscal_periods: (1..=12).collect(),
791        }
792    }
793
794    /// Create a time period for specific months.
795    pub fn months(year: u16, start_month: u8, num_months: u8) -> Self {
796        let start_date = NaiveDate::from_ymd_opt(year as i32, start_month as u32, 1)
797            .expect("valid date/time components");
798        let end_month = ((start_month - 1 + num_months - 1) % 12) + 1;
799        let end_year = year + (start_month as u16 - 1 + num_months as u16 - 1) / 12;
800        let end_date = TemporalSampler::last_day_of_month(
801            NaiveDate::from_ymd_opt(end_year as i32, end_month as u32, 1)
802                .expect("valid date/time components"),
803        );
804
805        Self {
806            start_date,
807            end_date,
808            fiscal_year: year,
809            fiscal_periods: (start_month..start_month + num_months).collect(),
810        }
811    }
812
813    /// Get total days in the period.
814    pub fn total_days(&self) -> i64 {
815        (self.end_date - self.start_date).num_days() + 1
816    }
817}
818
819#[cfg(test)]
820#[allow(clippy::unwrap_used)]
821mod tests {
822    use super::*;
823    use chrono::Timelike;
824
825    #[test]
826    fn test_is_weekend() {
827        let sampler = TemporalSampler::new(42);
828        let saturday = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
829        let sunday = NaiveDate::from_ymd_opt(2024, 6, 16).unwrap();
830        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap();
831
832        assert!(sampler.is_weekend(saturday));
833        assert!(sampler.is_weekend(sunday));
834        assert!(!sampler.is_weekend(monday));
835    }
836
837    #[test]
838    fn test_is_month_end() {
839        let sampler = TemporalSampler::new(42);
840        let month_end = NaiveDate::from_ymd_opt(2024, 6, 28).unwrap();
841        let month_start = NaiveDate::from_ymd_opt(2024, 6, 1).unwrap();
842
843        assert!(sampler.is_month_end(month_end));
844        assert!(!sampler.is_month_end(month_start));
845    }
846
847    #[test]
848    fn test_date_multiplier() {
849        let sampler = TemporalSampler::new(42);
850
851        // Regular weekday (Wednesday = 1.0)
852        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 12).unwrap(); // Wednesday
853        assert!((sampler.get_date_multiplier(regular_day) - 1.0).abs() < 0.01);
854
855        // Weekend
856        let weekend = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Saturday
857        assert!(sampler.get_date_multiplier(weekend) < 0.2);
858
859        // Month end
860        let month_end = NaiveDate::from_ymd_opt(2024, 6, 28).unwrap();
861        assert!(sampler.get_date_multiplier(month_end) > 2.0);
862    }
863
864    #[test]
865    fn test_day_of_week_patterns() {
866        let sampler = TemporalSampler::new(42);
867
868        // June 2024: 10=Mon, 11=Tue, 12=Wed, 13=Thu, 14=Fri
869        let monday = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap();
870        let tuesday = NaiveDate::from_ymd_opt(2024, 6, 11).unwrap();
871        let wednesday = NaiveDate::from_ymd_opt(2024, 6, 12).unwrap();
872        let thursday = NaiveDate::from_ymd_opt(2024, 6, 13).unwrap();
873        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap();
874
875        // Monday should have highest weekday multiplier (catch-up)
876        let mon_mult = sampler.get_day_of_week_multiplier(monday);
877        assert!((mon_mult - 1.3).abs() < 0.01);
878
879        // Tuesday slightly elevated
880        let tue_mult = sampler.get_day_of_week_multiplier(tuesday);
881        assert!((tue_mult - 1.1).abs() < 0.01);
882
883        // Wednesday/Thursday normal
884        let wed_mult = sampler.get_day_of_week_multiplier(wednesday);
885        let thu_mult = sampler.get_day_of_week_multiplier(thursday);
886        assert!((wed_mult - 1.0).abs() < 0.01);
887        assert!((thu_mult - 1.0).abs() < 0.01);
888
889        // Friday reduced (winding down)
890        let fri_mult = sampler.get_day_of_week_multiplier(friday);
891        assert!((fri_mult - 0.85).abs() < 0.01);
892
893        // Verify the pattern is applied in get_date_multiplier
894        // (excluding period-end effects)
895        assert!(sampler.get_date_multiplier(monday) > sampler.get_date_multiplier(friday));
896    }
897
898    #[test]
899    fn test_sample_time_human() {
900        let mut sampler = TemporalSampler::new(42);
901
902        for _ in 0..100 {
903            let time = sampler.sample_time(true);
904            // Most times should be during working hours
905            let hour = time.hour();
906            // Just verify it's a valid time
907            assert!(hour < 24);
908        }
909    }
910
911    #[test]
912    fn test_time_period() {
913        let period = TimePeriod::fiscal_year(2024);
914        assert_eq!(period.total_days(), 366); // 2024 is leap year
915
916        let partial = TimePeriod::months(2024, 1, 6);
917        assert!(partial.total_days() > 180);
918        assert!(partial.total_days() < 185);
919    }
920}