Skip to main content

datasynth_core/distributions/
processing_lag.rs

1//! Processing lag modeling for event-to-posting time delays.
2//!
3//! Models the realistic time delays between business events and their
4//! recording in the accounting system, including cross-day posting logic.
5
6use chrono::{Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12/// Type of business event that triggers a posting.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14#[serde(rename_all = "snake_case")]
15pub enum EventType {
16    /// Sales order creation
17    SalesOrder,
18    /// Purchase order creation
19    PurchaseOrder,
20    /// Goods receipt
21    GoodsReceipt,
22    /// Invoice receipt (vendor)
23    InvoiceReceipt,
24    /// Invoice issue (customer)
25    InvoiceIssue,
26    /// Payment (incoming or outgoing)
27    Payment,
28    /// Manual journal entry
29    JournalEntry,
30    /// Accrual entry
31    Accrual,
32    /// Depreciation posting
33    Depreciation,
34    /// Intercompany transaction
35    Intercompany,
36    /// Period close adjustment
37    PeriodClose,
38}
39
40/// Distribution type for lag calculations.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42#[serde(tag = "type", rename_all = "snake_case")]
43pub enum LagDistributionType {
44    /// Fixed lag (deterministic)
45    Fixed {
46        /// Lag in hours
47        hours: f64,
48    },
49    /// Normal distribution
50    Normal {
51        /// Mean lag in hours
52        mu: f64,
53        /// Standard deviation in hours
54        sigma: f64,
55    },
56    /// Log-normal distribution (common for processing delays)
57    LogNormal {
58        /// Log-scale mean
59        mu: f64,
60        /// Log-scale standard deviation
61        sigma: f64,
62    },
63    /// Exponential distribution
64    Exponential {
65        /// Rate parameter (1/mean)
66        lambda: f64,
67    },
68}
69
70impl Default for LagDistributionType {
71    fn default() -> Self {
72        Self::LogNormal {
73            mu: 0.5, // ~1.6 hours median
74            sigma: 0.8,
75        }
76    }
77}
78
79/// Configuration for a specific lag distribution.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct LagDistribution {
82    /// Distribution type for sampling
83    #[serde(default)]
84    pub distribution: LagDistributionType,
85    /// Minimum lag in hours (floor)
86    #[serde(default)]
87    pub min_lag_hours: f64,
88    /// Maximum lag in hours (ceiling)
89    #[serde(default = "default_max_lag")]
90    pub max_lag_hours: f64,
91}
92
93fn default_max_lag() -> f64 {
94    72.0 // 3 days
95}
96
97impl Default for LagDistribution {
98    fn default() -> Self {
99        Self {
100            distribution: LagDistributionType::default(),
101            min_lag_hours: 0.0,
102            max_lag_hours: 72.0,
103        }
104    }
105}
106
107impl LagDistribution {
108    /// Create a fixed lag distribution.
109    pub fn fixed(hours: f64) -> Self {
110        Self {
111            distribution: LagDistributionType::Fixed { hours },
112            min_lag_hours: hours,
113            max_lag_hours: hours,
114        }
115    }
116
117    /// Create a log-normal distribution with typical accounting delays.
118    pub fn log_normal(mu: f64, sigma: f64) -> Self {
119        Self {
120            distribution: LagDistributionType::LogNormal { mu, sigma },
121            min_lag_hours: 0.0,
122            max_lag_hours: 72.0,
123        }
124    }
125
126    /// Create a normal distribution.
127    pub fn normal(mu: f64, sigma: f64) -> Self {
128        Self {
129            distribution: LagDistributionType::Normal { mu, sigma },
130            min_lag_hours: 0.0,
131            max_lag_hours: 72.0,
132        }
133    }
134
135    /// Sample a lag value in hours.
136    pub fn sample(&self, rng: &mut ChaCha8Rng) -> f64 {
137        let raw = match &self.distribution {
138            LagDistributionType::Fixed { hours } => *hours,
139            LagDistributionType::Normal { mu, sigma } => {
140                // Box-Muller transform for normal distribution
141                let u1: f64 = rng.gen();
142                let u2: f64 = rng.gen();
143                let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
144                mu + sigma * z
145            }
146            LagDistributionType::LogNormal { mu, sigma } => {
147                // Sample from log-normal
148                let u1: f64 = rng.gen();
149                let u2: f64 = rng.gen();
150                let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
151                (mu + sigma * z).exp()
152            }
153            LagDistributionType::Exponential { lambda } => {
154                let u: f64 = rng.gen();
155                -u.ln() / lambda
156            }
157        };
158
159        raw.clamp(self.min_lag_hours, self.max_lag_hours)
160    }
161}
162
163/// Configuration for cross-day posting behavior.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct CrossDayConfig {
166    /// Enable cross-day posting logic
167    #[serde(default = "default_true")]
168    pub enabled: bool,
169    /// Probability of cross-day posting by hour
170    /// Keys are hours (0-23), values are probabilities (0.0-1.0)
171    #[serde(default)]
172    pub probability_by_hour: HashMap<u8, f64>,
173    /// Working day start hour (events before this may post same day)
174    #[serde(default = "default_work_start")]
175    pub work_start_hour: u8,
176    /// Working day end hour (events after this likely post next day)
177    #[serde(default = "default_work_end")]
178    pub work_end_hour: u8,
179    /// Cutoff hour for same-day posting
180    #[serde(default = "default_cutoff")]
181    pub same_day_cutoff_hour: u8,
182}
183
184fn default_true() -> bool {
185    true
186}
187
188fn default_work_start() -> u8 {
189    8
190}
191
192fn default_work_end() -> u8 {
193    18
194}
195
196fn default_cutoff() -> u8 {
197    16
198}
199
200impl Default for CrossDayConfig {
201    fn default() -> Self {
202        let mut probability_by_hour = HashMap::new();
203        // After 5pm, increasing probability of next-day posting
204        probability_by_hour.insert(17, 0.3);
205        probability_by_hour.insert(18, 0.6);
206        probability_by_hour.insert(19, 0.8);
207        probability_by_hour.insert(20, 0.9);
208        probability_by_hour.insert(21, 0.95);
209        probability_by_hour.insert(22, 0.99);
210        probability_by_hour.insert(23, 0.99);
211
212        Self {
213            enabled: true,
214            probability_by_hour,
215            work_start_hour: 8,
216            work_end_hour: 18,
217            same_day_cutoff_hour: 16,
218        }
219    }
220}
221
222impl CrossDayConfig {
223    /// Get the probability of next-day posting for a given hour.
224    pub fn next_day_probability(&self, hour: u8) -> f64 {
225        if !self.enabled {
226            return 0.0;
227        }
228
229        if let Some(&prob) = self.probability_by_hour.get(&hour) {
230            return prob;
231        }
232
233        // Default behavior based on work hours
234        if hour < self.same_day_cutoff_hour {
235            0.0 // Before cutoff, same-day
236        } else if hour < self.work_end_hour {
237            0.2 // Late afternoon, some spillover
238        } else {
239            0.8 // After hours, likely next day
240        }
241    }
242}
243
244/// Full configuration for processing lags.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct ProcessingLagConfig {
247    /// Enable processing lag calculations
248    #[serde(default = "default_true")]
249    pub enabled: bool,
250
251    /// Default lag distribution (used when event type not specified)
252    #[serde(default)]
253    pub default_lag: LagDistribution,
254
255    /// Event-specific lag distributions
256    #[serde(default)]
257    pub event_lags: HashMap<EventType, LagDistribution>,
258
259    /// Cross-day posting configuration
260    #[serde(default)]
261    pub cross_day: CrossDayConfig,
262}
263
264impl Default for ProcessingLagConfig {
265    fn default() -> Self {
266        let mut event_lags = HashMap::new();
267
268        // Typical lags for different event types
269        event_lags.insert(
270            EventType::SalesOrder,
271            LagDistribution::log_normal(0.5, 0.8), // Quick, ~1.6 hours median
272        );
273        event_lags.insert(
274            EventType::PurchaseOrder,
275            LagDistribution::log_normal(0.7, 0.6), // Slightly longer
276        );
277        event_lags.insert(
278            EventType::GoodsReceipt,
279            LagDistribution::log_normal(1.0, 0.5), // ~2.7 hours median
280        );
281        event_lags.insert(
282            EventType::InvoiceReceipt,
283            LagDistribution::log_normal(1.5, 0.6), // ~4.5 hours median
284        );
285        event_lags.insert(
286            EventType::InvoiceIssue,
287            LagDistribution::log_normal(0.3, 0.5), // Fast, ~1.3 hours
288        );
289        event_lags.insert(
290            EventType::Payment,
291            LagDistribution::log_normal(0.8, 0.7), // ~2.2 hours median
292        );
293        event_lags.insert(
294            EventType::JournalEntry,
295            LagDistribution::log_normal(0.0, 0.3), // Near instant, ~1 hour
296        );
297        event_lags.insert(
298            EventType::Accrual,
299            LagDistribution::fixed(0.0), // Immediate (batch)
300        );
301        event_lags.insert(
302            EventType::Depreciation,
303            LagDistribution::fixed(0.0), // Immediate (batch)
304        );
305        event_lags.insert(
306            EventType::Intercompany,
307            LagDistribution::log_normal(2.0, 0.8), // Longer due to coordination
308        );
309        event_lags.insert(
310            EventType::PeriodClose,
311            LagDistribution::fixed(0.0), // Immediate (batch)
312        );
313
314        Self {
315            enabled: true,
316            default_lag: LagDistribution::log_normal(0.5, 0.8),
317            event_lags,
318            cross_day: CrossDayConfig::default(),
319        }
320    }
321}
322
323impl ProcessingLagConfig {
324    /// Get the lag distribution for an event type.
325    pub fn get_lag_distribution(&self, event_type: EventType) -> &LagDistribution {
326        self.event_lags
327            .get(&event_type)
328            .unwrap_or(&self.default_lag)
329    }
330}
331
332/// Calculator for processing lags.
333pub struct ProcessingLagCalculator {
334    config: ProcessingLagConfig,
335    rng: ChaCha8Rng,
336}
337
338impl ProcessingLagCalculator {
339    /// Create a new calculator with default configuration.
340    pub fn new(seed: u64) -> Self {
341        Self {
342            config: ProcessingLagConfig::default(),
343            rng: ChaCha8Rng::seed_from_u64(seed),
344        }
345    }
346
347    /// Create with custom configuration.
348    pub fn with_config(seed: u64, config: ProcessingLagConfig) -> Self {
349        Self {
350            config,
351            rng: ChaCha8Rng::seed_from_u64(seed),
352        }
353    }
354
355    /// Calculate the posting time for an event.
356    ///
357    /// Takes the event datetime and returns the datetime when it would be posted
358    /// to the accounting system.
359    pub fn calculate_posting_time(
360        &mut self,
361        event_type: EventType,
362        event_datetime: NaiveDateTime,
363    ) -> NaiveDateTime {
364        if !self.config.enabled {
365            return event_datetime;
366        }
367
368        // Get lag distribution for this event type
369        let lag_dist = self.config.get_lag_distribution(event_type);
370        let lag_hours = lag_dist.sample(&mut self.rng);
371
372        // Calculate raw posting time
373        let lag_seconds = (lag_hours * 3600.0) as i64;
374        let mut posting_time = event_datetime + Duration::seconds(lag_seconds);
375
376        // Check for cross-day posting
377        if self.should_post_next_day(event_datetime.hour() as u8) {
378            // Move to next business day morning
379            let next_day = event_datetime.date() + Duration::days(1);
380            let morning_hour = self.config.cross_day.work_start_hour as u32;
381            let morning_minute: u32 = self.rng.gen_range(0..60);
382            posting_time = NaiveDateTime::new(
383                next_day,
384                NaiveTime::from_hms_opt(morning_hour, morning_minute, 0).unwrap(),
385            );
386        }
387
388        // Ensure posting is not before event
389        if posting_time < event_datetime {
390            posting_time = event_datetime;
391        }
392
393        posting_time
394    }
395
396    /// Determine if an event should be posted the next day based on its hour.
397    pub fn should_post_next_day(&mut self, hour: u8) -> bool {
398        let prob = self.config.cross_day.next_day_probability(hour);
399        self.rng.gen::<f64>() < prob
400    }
401
402    /// Calculate posting date (ignoring time).
403    pub fn calculate_posting_date(
404        &mut self,
405        event_type: EventType,
406        event_date: NaiveDate,
407        event_hour: u8,
408    ) -> NaiveDate {
409        let event_time = NaiveTime::from_hms_opt(event_hour as u32, 0, 0).unwrap();
410        let event_datetime = NaiveDateTime::new(event_date, event_time);
411        self.calculate_posting_time(event_type, event_datetime)
412            .date()
413    }
414
415    /// Get the configuration.
416    pub fn config(&self) -> &ProcessingLagConfig {
417        &self.config
418    }
419
420    /// Reset with a new seed.
421    pub fn reset(&mut self, seed: u64) {
422        self.rng = ChaCha8Rng::seed_from_u64(seed);
423    }
424}
425
426/// Schema configuration for YAML/JSON deserialization.
427#[derive(Debug, Clone, Default, Serialize, Deserialize)]
428pub struct ProcessingLagSchemaConfig {
429    /// Enable processing lag calculations
430    #[serde(default = "default_true")]
431    pub enabled: bool,
432
433    /// Sales order lag (log-normal mu, sigma)
434    #[serde(default)]
435    pub sales_order_lag: Option<LagSchemaConfig>,
436
437    /// Purchase order lag
438    #[serde(default)]
439    pub purchase_order_lag: Option<LagSchemaConfig>,
440
441    /// Goods receipt lag
442    #[serde(default)]
443    pub goods_receipt_lag: Option<LagSchemaConfig>,
444
445    /// Invoice receipt lag
446    #[serde(default)]
447    pub invoice_receipt_lag: Option<LagSchemaConfig>,
448
449    /// Invoice issue lag
450    #[serde(default)]
451    pub invoice_issue_lag: Option<LagSchemaConfig>,
452
453    /// Payment lag
454    #[serde(default)]
455    pub payment_lag: Option<LagSchemaConfig>,
456
457    /// Journal entry lag
458    #[serde(default)]
459    pub journal_entry_lag: Option<LagSchemaConfig>,
460
461    /// Cross-day posting configuration
462    #[serde(default)]
463    pub cross_day_posting: Option<CrossDaySchemaConfig>,
464}
465
466/// Schema config for a lag distribution.
467#[derive(Debug, Clone, Serialize, Deserialize)]
468pub struct LagSchemaConfig {
469    /// Log-scale mean (for log-normal)
470    pub mu: f64,
471    /// Log-scale standard deviation (for log-normal)
472    pub sigma: f64,
473    /// Minimum lag in hours
474    #[serde(default)]
475    pub min_hours: Option<f64>,
476    /// Maximum lag in hours
477    #[serde(default)]
478    pub max_hours: Option<f64>,
479}
480
481impl LagSchemaConfig {
482    /// Convert to LagDistribution.
483    pub fn to_distribution(&self) -> LagDistribution {
484        LagDistribution {
485            distribution: LagDistributionType::LogNormal {
486                mu: self.mu,
487                sigma: self.sigma,
488            },
489            min_lag_hours: self.min_hours.unwrap_or(0.0),
490            max_lag_hours: self.max_hours.unwrap_or(72.0),
491        }
492    }
493}
494
495/// Schema config for cross-day posting.
496#[derive(Debug, Clone, Default, Serialize, Deserialize)]
497pub struct CrossDaySchemaConfig {
498    /// Enable cross-day posting
499    #[serde(default = "default_true")]
500    pub enabled: bool,
501
502    /// Probability by hour (map of hour -> probability)
503    #[serde(default)]
504    pub probability_by_hour: HashMap<u8, f64>,
505}
506
507impl ProcessingLagSchemaConfig {
508    /// Convert to ProcessingLagConfig.
509    pub fn to_config(&self) -> ProcessingLagConfig {
510        let mut config = ProcessingLagConfig {
511            enabled: self.enabled,
512            ..Default::default()
513        };
514
515        // Apply event-specific lags
516        if let Some(lag) = &self.sales_order_lag {
517            config
518                .event_lags
519                .insert(EventType::SalesOrder, lag.to_distribution());
520        }
521        if let Some(lag) = &self.purchase_order_lag {
522            config
523                .event_lags
524                .insert(EventType::PurchaseOrder, lag.to_distribution());
525        }
526        if let Some(lag) = &self.goods_receipt_lag {
527            config
528                .event_lags
529                .insert(EventType::GoodsReceipt, lag.to_distribution());
530        }
531        if let Some(lag) = &self.invoice_receipt_lag {
532            config
533                .event_lags
534                .insert(EventType::InvoiceReceipt, lag.to_distribution());
535        }
536        if let Some(lag) = &self.invoice_issue_lag {
537            config
538                .event_lags
539                .insert(EventType::InvoiceIssue, lag.to_distribution());
540        }
541        if let Some(lag) = &self.payment_lag {
542            config
543                .event_lags
544                .insert(EventType::Payment, lag.to_distribution());
545        }
546        if let Some(lag) = &self.journal_entry_lag {
547            config
548                .event_lags
549                .insert(EventType::JournalEntry, lag.to_distribution());
550        }
551
552        // Apply cross-day config
553        if let Some(cross_day) = &self.cross_day_posting {
554            config.cross_day.enabled = cross_day.enabled;
555            if !cross_day.probability_by_hour.is_empty() {
556                config.cross_day.probability_by_hour = cross_day.probability_by_hour.clone();
557            }
558        }
559
560        config
561    }
562}
563
564#[cfg(test)]
565mod tests {
566    use super::*;
567
568    #[test]
569    fn test_fixed_lag() {
570        let lag = LagDistribution::fixed(2.0);
571        let mut rng = ChaCha8Rng::seed_from_u64(42);
572
573        // Should always return 2.0
574        for _ in 0..10 {
575            assert!((lag.sample(&mut rng) - 2.0).abs() < 0.01);
576        }
577    }
578
579    #[test]
580    fn test_log_normal_lag() {
581        let lag = LagDistribution::log_normal(0.5, 0.5);
582        let mut rng = ChaCha8Rng::seed_from_u64(42);
583
584        let mut samples: Vec<f64> = (0..1000).map(|_| lag.sample(&mut rng)).collect();
585        samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
586
587        // Median should be around e^0.5 ≈ 1.65
588        let median = samples[500];
589        assert!(median > 1.0 && median < 3.0);
590
591        // All values should be within bounds
592        assert!(samples.iter().all(|&x| (0.0..=72.0).contains(&x)));
593    }
594
595    #[test]
596    fn test_cross_day_probability() {
597        let config = CrossDayConfig::default();
598
599        // Early morning - no cross-day
600        assert!(config.next_day_probability(8) < 0.1);
601
602        // Mid-day - no cross-day
603        assert!(config.next_day_probability(14) < 0.1);
604
605        // After 5pm - increasing probability
606        assert!(config.next_day_probability(17) > 0.2);
607        assert!(config.next_day_probability(19) > 0.7);
608        assert!(config.next_day_probability(22) > 0.9);
609    }
610
611    #[test]
612    fn test_processing_lag_calculator() {
613        let mut calc = ProcessingLagCalculator::new(42);
614
615        let event_time = NaiveDateTime::new(
616            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
617            NaiveTime::from_hms_opt(10, 0, 0).unwrap(),
618        );
619
620        let posting_time = calc.calculate_posting_time(EventType::SalesOrder, event_time);
621
622        // Posting should be after event
623        assert!(posting_time >= event_time);
624
625        // For sales orders with mid-morning event, posting should be same day or close
626        let hours_diff = (posting_time - event_time).num_hours();
627        assert!(hours_diff < 24);
628    }
629
630    #[test]
631    fn test_late_event_cross_day() {
632        // Test with high probability of cross-day posting
633        let mut config = ProcessingLagConfig::default();
634        config.cross_day.probability_by_hour.insert(22, 1.0); // Force next-day for 10pm
635
636        let mut calc = ProcessingLagCalculator::with_config(42, config);
637
638        let event_time = NaiveDateTime::new(
639            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
640            NaiveTime::from_hms_opt(22, 0, 0).unwrap(),
641        );
642
643        let posting_time = calc.calculate_posting_time(EventType::SalesOrder, event_time);
644
645        // Should post next day
646        assert!(posting_time.date() > event_time.date());
647    }
648
649    #[test]
650    fn test_event_specific_lags() {
651        let config = ProcessingLagConfig::default();
652
653        // Accrual and depreciation should have fixed 0 lag
654        let accrual_lag = config.get_lag_distribution(EventType::Accrual);
655        if let LagDistributionType::Fixed { hours } = accrual_lag.distribution {
656            assert!((hours - 0.0).abs() < 0.01);
657        } else {
658            panic!("Accrual should have fixed lag");
659        }
660
661        // Invoice receipt should have longer lag than sales order
662        let invoice_lag = config.get_lag_distribution(EventType::InvoiceReceipt);
663        let sales_lag = config.get_lag_distribution(EventType::SalesOrder);
664
665        if let (
666            LagDistributionType::LogNormal { mu: inv_mu, .. },
667            LagDistributionType::LogNormal { mu: sales_mu, .. },
668        ) = (&invoice_lag.distribution, &sales_lag.distribution)
669        {
670            assert!(inv_mu > sales_mu);
671        }
672    }
673
674    #[test]
675    fn test_schema_config_conversion() {
676        let schema = ProcessingLagSchemaConfig {
677            enabled: true,
678            sales_order_lag: Some(LagSchemaConfig {
679                mu: 1.0,
680                sigma: 0.5,
681                min_hours: Some(0.5),
682                max_hours: Some(24.0),
683            }),
684            cross_day_posting: Some(CrossDaySchemaConfig {
685                enabled: true,
686                probability_by_hour: {
687                    let mut m = HashMap::new();
688                    m.insert(18, 0.5);
689                    m
690                },
691            }),
692            ..Default::default()
693        };
694
695        let config = schema.to_config();
696
697        // Check sales order lag was customized
698        let sales_lag = config.get_lag_distribution(EventType::SalesOrder);
699        assert!((sales_lag.min_lag_hours - 0.5).abs() < 0.01);
700        assert!((sales_lag.max_lag_hours - 24.0).abs() < 0.01);
701
702        // Check cross-day config
703        assert_eq!(config.cross_day.probability_by_hour.get(&18), Some(&0.5));
704    }
705
706    #[test]
707    fn test_calculate_posting_date() {
708        let mut calc = ProcessingLagCalculator::new(42);
709
710        let event_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
711        let posting_date = calc.calculate_posting_date(EventType::JournalEntry, event_date, 10);
712
713        // Journal entries are quick, should be same day or close
714        let days_diff = (posting_date - event_date).num_days();
715        assert!(days_diff <= 1);
716    }
717}