Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172        );
173        ref_gen.set_prefix(
174            ReferenceType::Invoice,
175            &template_config.references.invoice_prefix,
176        );
177        ref_gen.set_prefix(
178            ReferenceType::PurchaseOrder,
179            &template_config.references.po_prefix,
180        );
181        ref_gen.set_prefix(
182            ReferenceType::SalesOrder,
183            &template_config.references.so_prefix,
184        );
185
186        // Create weighted company selector (uniform weights for this constructor)
187        let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189        Self {
190            rng: seeded_rng(seed, 0),
191            seed,
192            config: config.clone(),
193            coa,
194            companies,
195            company_selector,
196            line_sampler: LineItemSampler::with_config(
197                seed + 1,
198                config.line_item_distribution.clone(),
199                config.even_odd_distribution.clone(),
200                config.debit_credit_distribution.clone(),
201            ),
202            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203            temporal_sampler: TemporalSampler::with_config(
204                seed + 3,
205                config.seasonality.clone(),
206                WorkingHoursConfig::default(),
207                Vec::new(),
208            ),
209            start_date,
210            end_date,
211            count: 0,
212            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213            user_pool,
214            description_generator: DescriptionGenerator::new(),
215            reference_generator: ref_gen,
216            template_config,
217            vendor_pool: VendorPool::standard(),
218            customer_pool: CustomerPool::standard(),
219            material_pool: None,
220            using_real_master_data: false,
221            fraud_config: FraudConfig::default(),
222            persona_errors_enabled: true, // Enable by default for realism
223            approval_enabled: true,       // Enable by default for realism
224            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
225            batch_state: None,
226            drift_controller: None,
227            business_day_calculator: None,
228            processing_lag_calculator: None,
229            temporal_patterns_config: None,
230        }
231    }
232
233    /// Create from a full GeneratorConfig.
234    ///
235    /// This constructor uses the volume_weight from company configs
236    /// for weighted company selection, and fraud config from GeneratorConfig.
237    pub fn from_generator_config(
238        full_config: &GeneratorConfig,
239        coa: Arc<ChartOfAccounts>,
240        start_date: NaiveDate,
241        end_date: NaiveDate,
242        seed: u64,
243    ) -> Self {
244        let companies: Vec<String> = full_config
245            .companies
246            .iter()
247            .map(|c| c.code.clone())
248            .collect();
249
250        // Create weighted selector using volume_weight from company configs
251        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253        let mut generator = Self::new_with_full_config(
254            full_config.transactions.clone(),
255            coa,
256            companies,
257            start_date,
258            end_date,
259            seed,
260            full_config.templates.clone(),
261            None,
262        );
263
264        // Override the uniform selector with weighted selector
265        generator.company_selector = company_selector;
266
267        // Set fraud config
268        generator.fraud_config = full_config.fraud.clone();
269
270        // Configure temporal patterns if enabled
271        let temporal_config = &full_config.temporal_patterns;
272        if temporal_config.enabled {
273            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274        }
275
276        generator
277    }
278
279    /// Configure temporal patterns including business day calculations and processing lags.
280    ///
281    /// This enables realistic temporal behavior including:
282    /// - Business day awareness (no postings on weekends/holidays)
283    /// - Processing lag modeling (event-to-posting delays)
284    /// - Period-end dynamics (volume spikes at month/quarter/year end)
285    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286        // Create business day calculator if enabled
287        if config.business_days.enabled {
288            let region = config
289                .calendars
290                .regions
291                .first()
292                .map(|r| Self::parse_region(r))
293                .unwrap_or(Region::US);
294
295            let calendar = HolidayCalendar::new(region, self.start_date.year());
296            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297        }
298
299        // Create processing lag calculator if enabled
300        if config.processing_lags.enabled {
301            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302            self.processing_lag_calculator =
303                Some(ProcessingLagCalculator::with_config(seed, lag_config));
304        }
305
306        // Create period-end dynamics if configured
307        let model = config.period_end.model.as_deref().unwrap_or("flat");
308        if model != "flat"
309            || config
310                .period_end
311                .month_end
312                .as_ref()
313                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314        {
315            let dynamics = Self::convert_period_end_config(&config.period_end);
316            self.temporal_sampler.set_period_end_dynamics(dynamics);
317        }
318
319        self.temporal_patterns_config = Some(config);
320        self
321    }
322
323    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
324    ///
325    /// This is an alternative to [`with_temporal_patterns`] that derives the
326    /// holiday calendar from a country-pack definition rather than the built-in
327    /// region-based calendars.  All other temporal behaviour (business-day
328    /// adjustment, processing lags, period-end dynamics) is configured
329    /// identically.
330    pub fn with_country_pack_temporal(
331        mut self,
332        config: TemporalPatternsConfig,
333        seed: u64,
334        pack: &CountryPack,
335    ) -> Self {
336        // Create business day calculator using the country pack calendar
337        if config.business_days.enabled {
338            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340        }
341
342        // Create processing lag calculator if enabled
343        if config.processing_lags.enabled {
344            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345            self.processing_lag_calculator =
346                Some(ProcessingLagCalculator::with_config(seed, lag_config));
347        }
348
349        // Create period-end dynamics if configured
350        let model = config.period_end.model.as_deref().unwrap_or("flat");
351        if model != "flat"
352            || config
353                .period_end
354                .month_end
355                .as_ref()
356                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357        {
358            let dynamics = Self::convert_period_end_config(&config.period_end);
359            self.temporal_sampler.set_period_end_dynamics(dynamics);
360        }
361
362        self.temporal_patterns_config = Some(config);
363        self
364    }
365
366    /// Convert schema processing lag config to core config.
367    fn convert_processing_lag_config(
368        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369    ) -> ProcessingLagConfig {
370        let mut config = ProcessingLagConfig {
371            enabled: schema.enabled,
372            ..Default::default()
373        };
374
375        // Helper to convert lag schema to distribution
376        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378            if let Some(min) = lag.min_hours {
379                dist.min_lag_hours = min;
380            }
381            if let Some(max) = lag.max_hours {
382                dist.max_lag_hours = max;
383            }
384            dist
385        };
386
387        // Apply event-specific lags
388        if let Some(ref lag) = schema.sales_order_lag {
389            config
390                .event_lags
391                .insert(EventType::SalesOrder, convert_lag(lag));
392        }
393        if let Some(ref lag) = schema.purchase_order_lag {
394            config
395                .event_lags
396                .insert(EventType::PurchaseOrder, convert_lag(lag));
397        }
398        if let Some(ref lag) = schema.goods_receipt_lag {
399            config
400                .event_lags
401                .insert(EventType::GoodsReceipt, convert_lag(lag));
402        }
403        if let Some(ref lag) = schema.invoice_receipt_lag {
404            config
405                .event_lags
406                .insert(EventType::InvoiceReceipt, convert_lag(lag));
407        }
408        if let Some(ref lag) = schema.invoice_issue_lag {
409            config
410                .event_lags
411                .insert(EventType::InvoiceIssue, convert_lag(lag));
412        }
413        if let Some(ref lag) = schema.payment_lag {
414            config
415                .event_lags
416                .insert(EventType::Payment, convert_lag(lag));
417        }
418        if let Some(ref lag) = schema.journal_entry_lag {
419            config
420                .event_lags
421                .insert(EventType::JournalEntry, convert_lag(lag));
422        }
423
424        // Apply cross-day posting config
425        if let Some(ref cross_day) = schema.cross_day_posting {
426            config.cross_day = CrossDayConfig {
427                enabled: cross_day.enabled,
428                probability_by_hour: cross_day.probability_by_hour.clone(),
429                ..Default::default()
430            };
431        }
432
433        config
434    }
435
436    /// Convert schema period-end config to core PeriodEndDynamics.
437    fn convert_period_end_config(
438        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439    ) -> PeriodEndDynamics {
440        let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442        // Helper to convert period config
443        let convert_period =
444            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445             default_peak: f64|
446             -> PeriodEndConfig {
447                if let Some(p) = period {
448                    let model = match model_type {
449                        "flat" => PeriodEndModel::FlatMultiplier {
450                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
451                        },
452                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453                            start_day: p.start_day.unwrap_or(-10),
454                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
455                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456                            ramp_up_days: 3, // Default ramp-up period
457                        },
458                        _ => PeriodEndModel::ExponentialAcceleration {
459                            start_day: p.start_day.unwrap_or(-10),
460                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
461                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                            decay_rate: p.decay_rate.unwrap_or(0.3),
463                        },
464                    };
465                    PeriodEndConfig {
466                        enabled: true,
467                        model,
468                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469                    }
470                } else {
471                    PeriodEndConfig {
472                        enabled: true,
473                        model: PeriodEndModel::ExponentialAcceleration {
474                            start_day: -10,
475                            base_multiplier: 1.0,
476                            peak_multiplier: default_peak,
477                            decay_rate: 0.3,
478                        },
479                        additional_multiplier: 1.0,
480                    }
481                }
482            };
483
484        PeriodEndDynamics::new(
485            convert_period(schema.month_end.as_ref(), 2.0),
486            convert_period(schema.quarter_end.as_ref(), 3.5),
487            convert_period(schema.year_end.as_ref(), 5.0),
488        )
489    }
490
491    /// Parse a region string into a Region enum.
492    fn parse_region(region_str: &str) -> Region {
493        match region_str.to_uppercase().as_str() {
494            "US" => Region::US,
495            "DE" => Region::DE,
496            "GB" => Region::GB,
497            "CN" => Region::CN,
498            "JP" => Region::JP,
499            "IN" => Region::IN,
500            "BR" => Region::BR,
501            "MX" => Region::MX,
502            "AU" => Region::AU,
503            "SG" => Region::SG,
504            "KR" => Region::KR,
505            _ => Region::US,
506        }
507    }
508
509    /// Set a custom company selector.
510    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
511        self.company_selector = selector;
512    }
513
514    /// Get the current company selector.
515    pub fn company_selector(&self) -> &WeightedCompanySelector {
516        &self.company_selector
517    }
518
519    /// Set fraud configuration.
520    pub fn set_fraud_config(&mut self, config: FraudConfig) {
521        self.fraud_config = config;
522    }
523
524    /// Set vendors from generated master data.
525    ///
526    /// This replaces the default vendor pool with actual generated vendors,
527    /// ensuring JEs reference real master data entities.
528    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
529        if !vendors.is_empty() {
530            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
531            self.using_real_master_data = true;
532        }
533        self
534    }
535
536    /// Set customers from generated master data.
537    ///
538    /// This replaces the default customer pool with actual generated customers,
539    /// ensuring JEs reference real master data entities.
540    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
541        if !customers.is_empty() {
542            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
543            self.using_real_master_data = true;
544        }
545        self
546    }
547
548    /// Set materials from generated master data.
549    ///
550    /// This provides material references for JEs that involve inventory movements.
551    pub fn with_materials(mut self, materials: &[Material]) -> Self {
552        if !materials.is_empty() {
553            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
554            self.using_real_master_data = true;
555        }
556        self
557    }
558
559    /// Set all master data at once for convenience.
560    ///
561    /// This is the recommended way to configure the JE generator with
562    /// generated master data to ensure data coherence.
563    pub fn with_master_data(
564        self,
565        vendors: &[Vendor],
566        customers: &[Customer],
567        materials: &[Material],
568    ) -> Self {
569        self.with_vendors(vendors)
570            .with_customers(customers)
571            .with_materials(materials)
572    }
573
574    /// Replace the user pool with one generated from a [`CountryPack`].
575    ///
576    /// This is an alternative to the default name-culture distribution that
577    /// derives name pools and weights from the country-pack's `names` section.
578    /// The existing user pool (if any) is discarded and regenerated using
579    /// [`MultiCultureNameGenerator::from_country_pack`].
580    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
581        let name_gen =
582            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
583        let config = UserGeneratorConfig {
584            // The culture distribution is embedded in the name generator
585            // itself, so we use an empty list here.
586            culture_distribution: Vec::new(),
587            email_domain: name_gen.email_domain().to_string(),
588            generate_realistic_names: true,
589        };
590        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
591        self.user_pool = Some(user_gen.generate_standard(&self.companies));
592        self
593    }
594
595    /// Check if the generator is using real master data.
596    pub fn is_using_real_master_data(&self) -> bool {
597        self.using_real_master_data
598    }
599
600    /// Determine if this transaction should be fraudulent.
601    fn determine_fraud(&mut self) -> Option<FraudType> {
602        if !self.fraud_config.enabled {
603            return None;
604        }
605
606        // Roll for fraud based on fraud rate
607        if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
608            return None;
609        }
610
611        // Select fraud type based on distribution
612        Some(self.select_fraud_type())
613    }
614
615    /// Select a fraud type based on the configured distribution.
616    fn select_fraud_type(&mut self) -> FraudType {
617        let dist = &self.fraud_config.fraud_type_distribution;
618        let roll: f64 = self.rng.gen();
619
620        let mut cumulative = 0.0;
621
622        cumulative += dist.suspense_account_abuse;
623        if roll < cumulative {
624            return FraudType::SuspenseAccountAbuse;
625        }
626
627        cumulative += dist.fictitious_transaction;
628        if roll < cumulative {
629            return FraudType::FictitiousTransaction;
630        }
631
632        cumulative += dist.revenue_manipulation;
633        if roll < cumulative {
634            return FraudType::RevenueManipulation;
635        }
636
637        cumulative += dist.expense_capitalization;
638        if roll < cumulative {
639            return FraudType::ExpenseCapitalization;
640        }
641
642        cumulative += dist.split_transaction;
643        if roll < cumulative {
644            return FraudType::SplitTransaction;
645        }
646
647        cumulative += dist.timing_anomaly;
648        if roll < cumulative {
649            return FraudType::TimingAnomaly;
650        }
651
652        cumulative += dist.unauthorized_access;
653        if roll < cumulative {
654            return FraudType::UnauthorizedAccess;
655        }
656
657        // Default fallback
658        FraudType::DuplicatePayment
659    }
660
661    /// Map a fraud type to an amount pattern for suspicious amounts.
662    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
663        match fraud_type {
664            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
665                FraudAmountPattern::ThresholdAdjacent
666            }
667            FraudType::FictitiousTransaction
668            | FraudType::FictitiousEntry
669            | FraudType::SuspenseAccountAbuse
670            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
671            FraudType::RevenueManipulation
672            | FraudType::ExpenseCapitalization
673            | FraudType::ImproperCapitalization
674            | FraudType::ReserveManipulation
675            | FraudType::UnauthorizedAccess
676            | FraudType::PrematureRevenue
677            | FraudType::UnderstatedLiabilities
678            | FraudType::OverstatedAssets
679            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
680            FraudType::DuplicatePayment
681            | FraudType::TimingAnomaly
682            | FraudType::SelfApproval
683            | FraudType::ExceededApprovalLimit
684            | FraudType::SegregationOfDutiesViolation
685            | FraudType::UnauthorizedApproval
686            | FraudType::CollusiveApproval
687            | FraudType::FictitiousVendor
688            | FraudType::ShellCompanyPayment
689            | FraudType::Kickback
690            | FraudType::KickbackScheme
691            | FraudType::InvoiceManipulation
692            | FraudType::AssetMisappropriation
693            | FraudType::InventoryTheft
694            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
695            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
696            FraudType::ImproperRevenueRecognition
697            | FraudType::ImproperPoAllocation
698            | FraudType::VariableConsiderationManipulation
699            | FraudType::ContractModificationMisstatement => {
700                FraudAmountPattern::StatisticallyImprobable
701            }
702            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
703            FraudType::LeaseClassificationManipulation
704            | FraudType::OffBalanceSheetLease
705            | FraudType::LeaseLiabilityUnderstatement
706            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
707            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
708            FraudType::FairValueHierarchyManipulation
709            | FraudType::Level3InputManipulation
710            | FraudType::ValuationTechniqueManipulation => {
711                FraudAmountPattern::StatisticallyImprobable
712            }
713            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
714            FraudType::DelayedImpairment
715            | FraudType::ImpairmentTestAvoidance
716            | FraudType::CashFlowProjectionManipulation
717            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
718            // Sourcing/Procurement Fraud
719            FraudType::BidRigging
720            | FraudType::PhantomVendorContract
721            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
722            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
723            // HR/Payroll Fraud
724            FraudType::GhostEmployeePayroll
725            | FraudType::PayrollInflation
726            | FraudType::DuplicateExpenseReport
727            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
728            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
729            // O2C Fraud
730            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
731            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
732        }
733    }
734
735    /// Generate a deterministic UUID using the factory.
736    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
737        self.uuid_factory.next()
738    }
739
740    /// Generate a single journal entry.
741    pub fn generate(&mut self) -> JournalEntry {
742        debug!(
743            count = self.count,
744            companies = self.companies.len(),
745            start_date = %self.start_date,
746            end_date = %self.end_date,
747            "Generating journal entry"
748        );
749
750        // Check if we're in a batch - if so, generate a batched entry
751        if let Some(ref state) = self.batch_state {
752            if state.remaining > 0 {
753                return self.generate_batched_entry();
754            }
755        }
756
757        self.count += 1;
758
759        // Generate deterministic document ID
760        let document_id = self.generate_deterministic_uuid();
761
762        // Sample posting date
763        let mut posting_date = self
764            .temporal_sampler
765            .sample_date(self.start_date, self.end_date);
766
767        // Adjust posting date to be a business day if business day calculator is configured
768        if let Some(ref calc) = self.business_day_calculator {
769            if !calc.is_business_day(posting_date) {
770                // Move to next business day
771                posting_date = calc.next_business_day(posting_date, false);
772                // Ensure we don't exceed end_date
773                if posting_date > self.end_date {
774                    posting_date = calc.prev_business_day(self.end_date, true);
775                }
776            }
777        }
778
779        // Select company using weighted selector
780        let company_code = self.company_selector.select(&mut self.rng).to_string();
781
782        // Sample line item specification
783        let line_spec = self.line_sampler.sample();
784
785        // Determine source type using full 4-way distribution
786        let source = self.select_source();
787        let is_automated = matches!(
788            source,
789            TransactionSource::Automated | TransactionSource::Recurring
790        );
791
792        // Select business process
793        let business_process = self.select_business_process();
794
795        // Determine if this is a fraudulent transaction
796        let fraud_type = self.determine_fraud();
797        let is_fraud = fraud_type.is_some();
798
799        // Sample time based on source
800        let time = self.temporal_sampler.sample_time(!is_automated);
801        let created_at = posting_date.and_time(time).and_utc();
802
803        // Select user from pool or generate generic
804        let (created_by, user_persona) = self.select_user(is_automated);
805
806        // Create header with deterministic UUID
807        let mut header =
808            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
809        header.created_at = created_at;
810        header.source = source;
811        header.created_by = created_by;
812        header.user_persona = user_persona;
813        header.business_process = Some(business_process);
814        header.is_fraud = is_fraud;
815        header.fraud_type = fraud_type;
816
817        // Generate description context
818        let mut context =
819            DescriptionContext::with_period(posting_date.month(), posting_date.year());
820
821        // Add vendor/customer context based on business process
822        match business_process {
823            BusinessProcess::P2P => {
824                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
825                    context.vendor_name = Some(vendor.name.clone());
826                }
827            }
828            BusinessProcess::O2C => {
829                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
830                    context.customer_name = Some(customer.name.clone());
831                }
832            }
833            _ => {}
834        }
835
836        // Generate header text if enabled
837        if self.template_config.descriptions.generate_header_text {
838            header.header_text = Some(self.description_generator.generate_header_text(
839                business_process,
840                &context,
841                &mut self.rng,
842            ));
843        }
844
845        // Generate reference if enabled
846        if self.template_config.references.generate_references {
847            header.reference = Some(
848                self.reference_generator
849                    .generate_for_process_year(business_process, posting_date.year()),
850            );
851        }
852
853        // Generate line items
854        let mut entry = JournalEntry::new(header);
855
856        // Generate amount - use fraud pattern if this is a fraudulent transaction
857        let base_amount = if let Some(ft) = fraud_type {
858            let pattern = self.fraud_type_to_amount_pattern(ft);
859            self.amount_sampler.sample_fraud(pattern)
860        } else {
861            self.amount_sampler.sample()
862        };
863
864        // Apply temporal drift if configured
865        let drift_adjusted_amount = {
866            let drift = self.get_drift_adjustments(posting_date);
867            if drift.amount_mean_multiplier != 1.0 {
868                // Apply drift multiplier (includes seasonal factor if enabled)
869                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
870                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
871                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
872            } else {
873                base_amount
874            }
875        };
876
877        // Apply human variation to amounts for non-automated transactions
878        let total_amount = if is_automated {
879            drift_adjusted_amount // Automated systems use exact amounts
880        } else {
881            self.apply_human_variation(drift_adjusted_amount)
882        };
883
884        // Generate debit lines
885        let debit_amounts = self
886            .amount_sampler
887            .sample_summing_to(line_spec.debit_count, total_amount);
888        for (i, amount) in debit_amounts.into_iter().enumerate() {
889            let account_number = self.select_debit_account().account_number.clone();
890            let mut line = JournalEntryLine::debit(
891                entry.header.document_id,
892                (i + 1) as u32,
893                account_number.clone(),
894                amount,
895            );
896
897            // Generate line text if enabled
898            if self.template_config.descriptions.generate_line_text {
899                line.line_text = Some(self.description_generator.generate_line_text(
900                    &account_number,
901                    &context,
902                    &mut self.rng,
903                ));
904            }
905
906            entry.add_line(line);
907        }
908
909        // Generate credit lines - use the SAME amounts to ensure balance
910        let credit_amounts = self
911            .amount_sampler
912            .sample_summing_to(line_spec.credit_count, total_amount);
913        for (i, amount) in credit_amounts.into_iter().enumerate() {
914            let account_number = self.select_credit_account().account_number.clone();
915            let mut line = JournalEntryLine::credit(
916                entry.header.document_id,
917                (line_spec.debit_count + i + 1) as u32,
918                account_number.clone(),
919                amount,
920            );
921
922            // Generate line text if enabled
923            if self.template_config.descriptions.generate_line_text {
924                line.line_text = Some(self.description_generator.generate_line_text(
925                    &account_number,
926                    &context,
927                    &mut self.rng,
928                ));
929            }
930
931            entry.add_line(line);
932        }
933
934        // Apply persona-based errors if enabled and it's a human user
935        if self.persona_errors_enabled && !is_automated {
936            self.maybe_inject_persona_error(&mut entry);
937        }
938
939        // Apply approval workflow if enabled and amount exceeds threshold
940        if self.approval_enabled {
941            self.maybe_apply_approval_workflow(&mut entry, posting_date);
942        }
943
944        // Maybe start a batch of similar entries for realism
945        self.maybe_start_batch(&entry);
946
947        entry
948    }
949
950    /// Enable or disable persona-based error injection.
951    ///
952    /// When enabled, entries created by human personas have a chance
953    /// to contain realistic human errors based on their experience level.
954    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
955        self.persona_errors_enabled = enabled;
956        self
957    }
958
959    /// Set fraud configuration for fraud injection.
960    ///
961    /// When fraud is enabled in the config, transactions have a chance
962    /// to be marked as fraudulent based on the configured fraud rate.
963    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
964        self.fraud_config = config;
965        self
966    }
967
968    /// Check if persona errors are enabled.
969    pub fn persona_errors_enabled(&self) -> bool {
970        self.persona_errors_enabled
971    }
972
973    /// Enable or disable batch processing behavior.
974    ///
975    /// When enabled (default), the generator will occasionally produce batches
976    /// of similar entries, simulating how humans batch similar work together.
977    pub fn with_batching(mut self, enabled: bool) -> Self {
978        if !enabled {
979            self.batch_state = None;
980        }
981        self
982    }
983
984    /// Check if batch processing is enabled.
985    pub fn batching_enabled(&self) -> bool {
986        // Batching is implicitly enabled when not explicitly disabled
987        true
988    }
989
990    /// Maybe start a batch based on the current entry.
991    ///
992    /// Humans often batch similar work: processing invoices from one vendor,
993    /// entering expense reports for a trip, reconciling similar items.
994    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
995        // Only start batch for non-automated, non-fraud entries
996        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
997            return;
998        }
999
1000        // 15% chance to start a batch (most work is not batched)
1001        if self.rng.gen::<f64>() > 0.15 {
1002            return;
1003        }
1004
1005        // Extract key attributes for batching
1006        let base_account = entry
1007            .lines
1008            .first()
1009            .map(|l| l.gl_account.clone())
1010            .unwrap_or_default();
1011
1012        let base_amount = entry.total_debit();
1013
1014        self.batch_state = Some(BatchState {
1015            base_account_number: base_account,
1016            base_amount,
1017            base_business_process: entry.header.business_process,
1018            base_posting_date: entry.header.posting_date,
1019            remaining: self.rng.gen_range(2..7), // 2-6 more similar entries
1020        });
1021    }
1022
1023    /// Generate an entry that's part of the current batch.
1024    ///
1025    /// Batched entries have:
1026    /// - Same or very similar business process
1027    /// - Same posting date (batched work done together)
1028    /// - Similar amounts (within ±15%)
1029    /// - Same debit account (processing similar items)
1030    fn generate_batched_entry(&mut self) -> JournalEntry {
1031        use rust_decimal::Decimal;
1032
1033        // Decrement batch counter
1034        if let Some(ref mut state) = self.batch_state {
1035            state.remaining = state.remaining.saturating_sub(1);
1036        }
1037
1038        let batch = self
1039            .batch_state
1040            .clone()
1041            .expect("batch_state set before calling generate_batched_entry");
1042
1043        // Use the batch's posting date (work done on same day)
1044        let posting_date = batch.base_posting_date;
1045
1046        self.count += 1;
1047        let document_id = self.generate_deterministic_uuid();
1048
1049        // Select same company (batched work is usually same company)
1050        let company_code = self.company_selector.select(&mut self.rng).to_string();
1051
1052        // Use simplified line spec for batched entries (usually 2-line)
1053        let _line_spec = LineItemSpec {
1054            total_count: 2,
1055            debit_count: 1,
1056            credit_count: 1,
1057            split_type: DebitCreditSplit::Equal,
1058        };
1059
1060        // Batched entries are always manual
1061        let source = TransactionSource::Manual;
1062
1063        // Use the batch's business process
1064        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1065
1066        // Sample time
1067        let time = self.temporal_sampler.sample_time(true);
1068        let created_at = posting_date.and_time(time).and_utc();
1069
1070        // Same user for batched work
1071        let (created_by, user_persona) = self.select_user(false);
1072
1073        // Create header
1074        let mut header =
1075            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1076        header.created_at = created_at;
1077        header.source = source;
1078        header.created_by = created_by;
1079        header.user_persona = user_persona;
1080        header.business_process = Some(business_process);
1081
1082        // Generate similar amount (within ±15% of base)
1083        let variation = self.rng.gen_range(-0.15..0.15);
1084        let varied_amount =
1085            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1086        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1087
1088        // Create the entry
1089        let mut entry = JournalEntry::new(header);
1090
1091        // Use same debit account as batch base
1092        let debit_line = JournalEntryLine::debit(
1093            entry.header.document_id,
1094            1,
1095            batch.base_account_number.clone(),
1096            total_amount,
1097        );
1098        entry.add_line(debit_line);
1099
1100        // Select a credit account
1101        let credit_account = self.select_credit_account().account_number.clone();
1102        let credit_line =
1103            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1104        entry.add_line(credit_line);
1105
1106        // Apply persona-based errors if enabled
1107        if self.persona_errors_enabled {
1108            self.maybe_inject_persona_error(&mut entry);
1109        }
1110
1111        // Apply approval workflow if enabled
1112        if self.approval_enabled {
1113            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1114        }
1115
1116        // Clear batch state if no more entries remaining
1117        if batch.remaining <= 1 {
1118            self.batch_state = None;
1119        }
1120
1121        entry
1122    }
1123
1124    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1125    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1126        // Parse persona from the entry header
1127        let persona_str = &entry.header.user_persona;
1128        let persona = match persona_str.to_lowercase().as_str() {
1129            s if s.contains("junior") => UserPersona::JuniorAccountant,
1130            s if s.contains("senior") => UserPersona::SeniorAccountant,
1131            s if s.contains("controller") => UserPersona::Controller,
1132            s if s.contains("manager") => UserPersona::Manager,
1133            s if s.contains("executive") => UserPersona::Executive,
1134            _ => return, // Don't inject errors for unknown personas
1135        };
1136
1137        // Get base error rate from persona
1138        let base_error_rate = persona.error_rate();
1139
1140        // Apply stress factors based on posting date
1141        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1142
1143        // Check if error should occur based on adjusted rate
1144        if self.rng.gen::<f64>() >= adjusted_rate {
1145            return; // No error this time
1146        }
1147
1148        // Select and inject persona-appropriate error
1149        self.inject_human_error(entry, persona);
1150    }
1151
1152    /// Apply contextual stress factors to the base error rate.
1153    ///
1154    /// Stress factors increase error likelihood during:
1155    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1156    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1157    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1158    /// - Monday morning (catch-up work): 20% more errors
1159    /// - Friday afternoon (rushing to leave): 30% more errors
1160    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1161        use chrono::Datelike;
1162
1163        let mut rate = base_rate;
1164        let day = posting_date.day();
1165        let month = posting_date.month();
1166
1167        // Year-end stress (December 28-31): double the error rate
1168        if month == 12 && day >= 28 {
1169            rate *= 2.0;
1170            return rate.min(0.5); // Cap at 50% to keep it realistic
1171        }
1172
1173        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1174        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1175            rate *= 1.75; // 75% more errors at quarter end
1176            return rate.min(0.4);
1177        }
1178
1179        // Month-end stress (last 3 days of month)
1180        if day >= 28 {
1181            rate *= 1.5; // 50% more errors at month end
1182        }
1183
1184        // Day-of-week stress effects
1185        let weekday = posting_date.weekday();
1186        match weekday {
1187            chrono::Weekday::Mon => {
1188                // Monday: catching up, often rushed
1189                rate *= 1.2;
1190            }
1191            chrono::Weekday::Fri => {
1192                // Friday: rushing to finish before weekend
1193                rate *= 1.3;
1194            }
1195            _ => {}
1196        }
1197
1198        // Cap at 40% to keep it realistic
1199        rate.min(0.4)
1200    }
1201
1202    /// Apply human-like variation to an amount.
1203    ///
1204    /// Humans don't enter perfectly calculated amounts - they:
1205    /// - Round amounts differently
1206    /// - Estimate instead of calculating exactly
1207    /// - Make small input variations
1208    ///
1209    /// This applies small variations (typically ±2%) to make amounts more realistic.
1210    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1211        use rust_decimal::Decimal;
1212
1213        // Automated transactions or very small amounts don't get variation
1214        if amount < Decimal::from(10) {
1215            return amount;
1216        }
1217
1218        // 70% chance of human variation being applied
1219        if self.rng.gen::<f64>() > 0.70 {
1220            return amount;
1221        }
1222
1223        // Decide which type of human variation to apply
1224        let variation_type: u8 = self.rng.gen_range(0..4);
1225
1226        match variation_type {
1227            0 => {
1228                // ±2% variation (common for estimated amounts)
1229                let variation_pct = self.rng.gen_range(-0.02..0.02);
1230                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1231                (amount + variation).round_dp(2)
1232            }
1233            1 => {
1234                // Round to nearest $10
1235                let ten = Decimal::from(10);
1236                (amount / ten).round() * ten
1237            }
1238            2 => {
1239                // Round to nearest $100 (for larger amounts)
1240                if amount >= Decimal::from(500) {
1241                    let hundred = Decimal::from(100);
1242                    (amount / hundred).round() * hundred
1243                } else {
1244                    amount
1245                }
1246            }
1247            3 => {
1248                // Slight under/over payment (±$0.01 to ±$1.00)
1249                let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1250                (amount + cents).max(Decimal::ZERO).round_dp(2)
1251            }
1252            _ => amount,
1253        }
1254    }
1255
1256    /// Rebalance an entry after a one-sided amount modification.
1257    ///
1258    /// When an error modifies one line's amount, this finds a line on the opposite
1259    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1260    /// same impact to maintain balance.
1261    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1262        // Find a line on the opposite side to adjust
1263        let balancing_idx = entry.lines.iter().position(|l| {
1264            if modified_was_debit {
1265                l.credit_amount > Decimal::ZERO
1266            } else {
1267                l.debit_amount > Decimal::ZERO
1268            }
1269        });
1270
1271        if let Some(idx) = balancing_idx {
1272            if modified_was_debit {
1273                entry.lines[idx].credit_amount += impact;
1274            } else {
1275                entry.lines[idx].debit_amount += impact;
1276            }
1277        }
1278    }
1279
1280    /// Inject a human-like error based on the persona.
1281    ///
1282    /// All error types maintain balance - amount modifications are applied to both sides.
1283    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1284    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1285        use rust_decimal::Decimal;
1286
1287        // Different personas make different types of errors
1288        let error_type: u8 = match persona {
1289            UserPersona::JuniorAccountant => {
1290                // Junior accountants make more varied errors
1291                self.rng.gen_range(0..5)
1292            }
1293            UserPersona::SeniorAccountant => {
1294                // Senior accountants mainly make transposition errors
1295                self.rng.gen_range(0..3)
1296            }
1297            UserPersona::Controller | UserPersona::Manager => {
1298                // Controllers/managers mainly make rounding or cutoff errors
1299                self.rng.gen_range(3..5)
1300            }
1301            _ => return,
1302        };
1303
1304        match error_type {
1305            0 => {
1306                // Transposed digits in an amount
1307                if let Some(line) = entry.lines.get_mut(0) {
1308                    let is_debit = line.debit_amount > Decimal::ZERO;
1309                    let original_amount = if is_debit {
1310                        line.debit_amount
1311                    } else {
1312                        line.credit_amount
1313                    };
1314
1315                    // Simple digit swap in the string representation
1316                    let s = original_amount.to_string();
1317                    if s.len() >= 2 {
1318                        let chars: Vec<char> = s.chars().collect();
1319                        let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1320                        if chars[pos].is_ascii_digit()
1321                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1322                        {
1323                            let mut new_chars = chars;
1324                            new_chars.swap(pos, pos + 1);
1325                            if let Ok(new_amount) =
1326                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1327                            {
1328                                let impact = new_amount - original_amount;
1329
1330                                // Apply to the modified line
1331                                if is_debit {
1332                                    entry.lines[0].debit_amount = new_amount;
1333                                } else {
1334                                    entry.lines[0].credit_amount = new_amount;
1335                                }
1336
1337                                // Rebalance the entry
1338                                Self::rebalance_entry(entry, is_debit, impact);
1339
1340                                entry.header.header_text = Some(
1341                                    entry.header.header_text.clone().unwrap_or_default()
1342                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1343                                );
1344                            }
1345                        }
1346                    }
1347                }
1348            }
1349            1 => {
1350                // Wrong decimal place (off by factor of 10)
1351                if let Some(line) = entry.lines.get_mut(0) {
1352                    let is_debit = line.debit_amount > Decimal::ZERO;
1353                    let original_amount = if is_debit {
1354                        line.debit_amount
1355                    } else {
1356                        line.credit_amount
1357                    };
1358
1359                    let new_amount = original_amount * Decimal::new(10, 0);
1360                    let impact = new_amount - original_amount;
1361
1362                    // Apply to the modified line
1363                    if is_debit {
1364                        entry.lines[0].debit_amount = new_amount;
1365                    } else {
1366                        entry.lines[0].credit_amount = new_amount;
1367                    }
1368
1369                    // Rebalance the entry
1370                    Self::rebalance_entry(entry, is_debit, impact);
1371
1372                    entry.header.header_text = Some(
1373                        entry.header.header_text.clone().unwrap_or_default()
1374                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1375                    );
1376                }
1377            }
1378            2 => {
1379                // Typo in description (doesn't affect balance)
1380                if let Some(ref mut text) = entry.header.header_text {
1381                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1382                    let correct = ["the", "and", "with", "that", "receive"];
1383                    let idx = self.rng.gen_range(0..typos.len());
1384                    if text.to_lowercase().contains(correct[idx]) {
1385                        *text = text.replace(correct[idx], typos[idx]);
1386                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1387                    }
1388                }
1389            }
1390            3 => {
1391                // Rounding to round number
1392                if let Some(line) = entry.lines.get_mut(0) {
1393                    let is_debit = line.debit_amount > Decimal::ZERO;
1394                    let original_amount = if is_debit {
1395                        line.debit_amount
1396                    } else {
1397                        line.credit_amount
1398                    };
1399
1400                    let new_amount =
1401                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1402                    let impact = new_amount - original_amount;
1403
1404                    // Apply to the modified line
1405                    if is_debit {
1406                        entry.lines[0].debit_amount = new_amount;
1407                    } else {
1408                        entry.lines[0].credit_amount = new_amount;
1409                    }
1410
1411                    // Rebalance the entry
1412                    Self::rebalance_entry(entry, is_debit, impact);
1413
1414                    entry.header.header_text = Some(
1415                        entry.header.header_text.clone().unwrap_or_default()
1416                            + " [HUMAN_ERROR:ROUNDED]",
1417                    );
1418                }
1419            }
1420            4 => {
1421                // Late posting marker (document date much earlier than posting date)
1422                // This doesn't create an imbalance
1423                if entry.header.document_date == entry.header.posting_date {
1424                    let days_late = self.rng.gen_range(5..15);
1425                    entry.header.document_date =
1426                        entry.header.posting_date - chrono::Duration::days(days_late);
1427                    entry.header.header_text = Some(
1428                        entry.header.header_text.clone().unwrap_or_default()
1429                            + " [HUMAN_ERROR:LATE_POSTING]",
1430                    );
1431                }
1432            }
1433            _ => {}
1434        }
1435    }
1436
1437    /// Apply approval workflow for high-value transactions.
1438    ///
1439    /// If the entry amount exceeds the approval threshold, simulate an
1440    /// approval workflow with appropriate approvers based on amount.
1441    fn maybe_apply_approval_workflow(
1442        &mut self,
1443        entry: &mut JournalEntry,
1444        _posting_date: NaiveDate,
1445    ) {
1446        use rust_decimal::Decimal;
1447
1448        let amount = entry.total_debit();
1449
1450        // Skip if amount is below threshold
1451        if amount <= self.approval_threshold {
1452            // Auto-approved below threshold
1453            let workflow = ApprovalWorkflow::auto_approved(
1454                entry.header.created_by.clone(),
1455                entry.header.user_persona.clone(),
1456                amount,
1457                entry.header.created_at,
1458            );
1459            entry.header.approval_workflow = Some(workflow);
1460            return;
1461        }
1462
1463        // Mark as SOX relevant for high-value transactions
1464        entry.header.sox_relevant = true;
1465
1466        // Determine required approval levels based on amount
1467        let required_levels = if amount > Decimal::new(100000, 0) {
1468            3 // Executive approval required
1469        } else if amount > Decimal::new(50000, 0) {
1470            2 // Senior management approval
1471        } else {
1472            1 // Manager approval
1473        };
1474
1475        // Create the approval workflow
1476        let mut workflow = ApprovalWorkflow::new(
1477            entry.header.created_by.clone(),
1478            entry.header.user_persona.clone(),
1479            amount,
1480        );
1481        workflow.required_levels = required_levels;
1482
1483        // Simulate submission
1484        let submit_time = entry.header.created_at;
1485        let submit_action = ApprovalAction::new(
1486            entry.header.created_by.clone(),
1487            entry.header.user_persona.clone(),
1488            self.parse_persona(&entry.header.user_persona),
1489            ApprovalActionType::Submit,
1490            0,
1491        )
1492        .with_timestamp(submit_time);
1493
1494        workflow.actions.push(submit_action);
1495        workflow.status = ApprovalStatus::Pending;
1496        workflow.submitted_at = Some(submit_time);
1497
1498        // Simulate approvals with realistic delays
1499        let mut current_time = submit_time;
1500        for level in 1..=required_levels {
1501            // Add delay for approval (1-3 business hours per level)
1502            let delay_hours = self.rng.gen_range(1..4);
1503            current_time += chrono::Duration::hours(delay_hours);
1504
1505            // Skip weekends
1506            while current_time.weekday() == chrono::Weekday::Sat
1507                || current_time.weekday() == chrono::Weekday::Sun
1508            {
1509                current_time += chrono::Duration::days(1);
1510            }
1511
1512            // Generate approver based on level
1513            let (approver_id, approver_role) = self.select_approver(level);
1514
1515            let approve_action = ApprovalAction::new(
1516                approver_id.clone(),
1517                format!("{:?}", approver_role),
1518                approver_role,
1519                ApprovalActionType::Approve,
1520                level,
1521            )
1522            .with_timestamp(current_time);
1523
1524            workflow.actions.push(approve_action);
1525            workflow.current_level = level;
1526        }
1527
1528        // Mark as approved
1529        workflow.status = ApprovalStatus::Approved;
1530        workflow.approved_at = Some(current_time);
1531
1532        entry.header.approval_workflow = Some(workflow);
1533    }
1534
1535    /// Select an approver based on the required level.
1536    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1537        let persona = match level {
1538            1 => UserPersona::Manager,
1539            2 => UserPersona::Controller,
1540            _ => UserPersona::Executive,
1541        };
1542
1543        // Try to get from user pool first
1544        if let Some(ref pool) = self.user_pool {
1545            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1546                return (user.user_id.clone(), persona);
1547            }
1548        }
1549
1550        // Fallback to generated approver
1551        let approver_id = match persona {
1552            UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1553            UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1554            UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1555            _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1556        };
1557
1558        (approver_id, persona)
1559    }
1560
1561    /// Parse user persona from string.
1562    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1563        match persona_str.to_lowercase().as_str() {
1564            s if s.contains("junior") => UserPersona::JuniorAccountant,
1565            s if s.contains("senior") => UserPersona::SeniorAccountant,
1566            s if s.contains("controller") => UserPersona::Controller,
1567            s if s.contains("manager") => UserPersona::Manager,
1568            s if s.contains("executive") => UserPersona::Executive,
1569            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1570            _ => UserPersona::JuniorAccountant, // Default
1571        }
1572    }
1573
1574    /// Enable or disable approval workflow.
1575    pub fn with_approval(mut self, enabled: bool) -> Self {
1576        self.approval_enabled = enabled;
1577        self
1578    }
1579
1580    /// Set the approval threshold amount.
1581    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1582        self.approval_threshold = threshold;
1583        self
1584    }
1585
1586    /// Set the temporal drift controller for simulating distribution changes over time.
1587    ///
1588    /// When drift is enabled, amounts and other distributions will shift based on
1589    /// the period (month) to simulate realistic temporal evolution like inflation
1590    /// or increasing fraud rates.
1591    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1592        self.drift_controller = Some(controller);
1593        self
1594    }
1595
1596    /// Set drift configuration directly.
1597    ///
1598    /// Creates a drift controller from the config. Total periods is calculated
1599    /// from the date range.
1600    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1601        if config.enabled {
1602            let total_periods = self.calculate_total_periods();
1603            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1604        }
1605        self
1606    }
1607
1608    /// Calculate total periods (months) in the date range.
1609    fn calculate_total_periods(&self) -> u32 {
1610        let start_year = self.start_date.year();
1611        let start_month = self.start_date.month();
1612        let end_year = self.end_date.year();
1613        let end_month = self.end_date.month();
1614
1615        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1616    }
1617
1618    /// Calculate the period number (0-indexed) for a given date.
1619    fn date_to_period(&self, date: NaiveDate) -> u32 {
1620        let start_year = self.start_date.year();
1621        let start_month = self.start_date.month() as i32;
1622        let date_year = date.year();
1623        let date_month = date.month() as i32;
1624
1625        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1626    }
1627
1628    /// Get drift adjustments for a given date.
1629    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1630        if let Some(ref controller) = self.drift_controller {
1631            let period = self.date_to_period(date);
1632            controller.compute_adjustments(period)
1633        } else {
1634            DriftAdjustments::none()
1635        }
1636    }
1637
1638    /// Select a user from the pool or generate a generic user ID.
1639    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1640        if let Some(ref pool) = self.user_pool {
1641            let persona = if is_automated {
1642                UserPersona::AutomatedSystem
1643            } else {
1644                // Random distribution among human personas
1645                let roll: f64 = self.rng.gen();
1646                if roll < 0.4 {
1647                    UserPersona::JuniorAccountant
1648                } else if roll < 0.7 {
1649                    UserPersona::SeniorAccountant
1650                } else if roll < 0.85 {
1651                    UserPersona::Controller
1652                } else {
1653                    UserPersona::Manager
1654                }
1655            };
1656
1657            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1658                return (
1659                    user.user_id.clone(),
1660                    format!("{:?}", user.persona).to_lowercase(),
1661                );
1662            }
1663        }
1664
1665        // Fallback to generic format
1666        if is_automated {
1667            (
1668                format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1669                "automated_system".to_string(),
1670            )
1671        } else {
1672            (
1673                format!("USER{:04}", self.rng.gen_range(1..=40)),
1674                "senior_accountant".to_string(),
1675            )
1676        }
1677    }
1678
1679    /// Select transaction source based on configuration weights.
1680    fn select_source(&mut self) -> TransactionSource {
1681        let roll: f64 = self.rng.gen();
1682        let dist = &self.config.source_distribution;
1683
1684        if roll < dist.manual {
1685            TransactionSource::Manual
1686        } else if roll < dist.manual + dist.automated {
1687            TransactionSource::Automated
1688        } else if roll < dist.manual + dist.automated + dist.recurring {
1689            TransactionSource::Recurring
1690        } else {
1691            TransactionSource::Adjustment
1692        }
1693    }
1694
1695    /// Select a business process based on configuration weights.
1696    fn select_business_process(&mut self) -> BusinessProcess {
1697        let roll: f64 = self.rng.gen();
1698
1699        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1700        if roll < 0.35 {
1701            BusinessProcess::O2C
1702        } else if roll < 0.65 {
1703            BusinessProcess::P2P
1704        } else if roll < 0.85 {
1705            BusinessProcess::R2R
1706        } else if roll < 0.95 {
1707            BusinessProcess::H2R
1708        } else {
1709            BusinessProcess::A2R
1710        }
1711    }
1712
1713    fn select_debit_account(&mut self) -> &GLAccount {
1714        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1715        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1716
1717        // 60% asset, 40% expense for debits
1718        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1719            accounts
1720        } else {
1721            expense_accounts
1722        };
1723
1724        all.choose(&mut self.rng)
1725            .copied()
1726            .unwrap_or_else(|| &self.coa.accounts[0])
1727    }
1728
1729    fn select_credit_account(&mut self) -> &GLAccount {
1730        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1731        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1732
1733        // 60% liability, 40% revenue for credits
1734        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1735            liability_accounts
1736        } else {
1737            revenue_accounts
1738        };
1739
1740        all.choose(&mut self.rng)
1741            .copied()
1742            .unwrap_or_else(|| &self.coa.accounts[0])
1743    }
1744}
1745
1746impl Generator for JournalEntryGenerator {
1747    type Item = JournalEntry;
1748    type Config = (
1749        TransactionConfig,
1750        Arc<ChartOfAccounts>,
1751        Vec<String>,
1752        NaiveDate,
1753        NaiveDate,
1754    );
1755
1756    fn new(config: Self::Config, seed: u64) -> Self {
1757        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1758    }
1759
1760    fn generate_one(&mut self) -> Self::Item {
1761        self.generate()
1762    }
1763
1764    fn reset(&mut self) {
1765        self.rng = seeded_rng(self.seed, 0);
1766        self.line_sampler.reset(self.seed + 1);
1767        self.amount_sampler.reset(self.seed + 2);
1768        self.temporal_sampler.reset(self.seed + 3);
1769        self.count = 0;
1770        self.uuid_factory.reset();
1771
1772        // Reset reference generator by recreating it
1773        let mut ref_gen = ReferenceGenerator::new(
1774            self.start_date.year(),
1775            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1776        );
1777        ref_gen.set_prefix(
1778            ReferenceType::Invoice,
1779            &self.template_config.references.invoice_prefix,
1780        );
1781        ref_gen.set_prefix(
1782            ReferenceType::PurchaseOrder,
1783            &self.template_config.references.po_prefix,
1784        );
1785        ref_gen.set_prefix(
1786            ReferenceType::SalesOrder,
1787            &self.template_config.references.so_prefix,
1788        );
1789        self.reference_generator = ref_gen;
1790    }
1791
1792    fn count(&self) -> u64 {
1793        self.count
1794    }
1795
1796    fn seed(&self) -> u64 {
1797        self.seed
1798    }
1799}
1800
1801#[cfg(test)]
1802#[allow(clippy::unwrap_used)]
1803mod tests {
1804    use super::*;
1805    use crate::ChartOfAccountsGenerator;
1806
1807    #[test]
1808    fn test_generate_balanced_entries() {
1809        let mut coa_gen =
1810            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1811        let coa = Arc::new(coa_gen.generate());
1812
1813        let mut je_gen = JournalEntryGenerator::new_with_params(
1814            TransactionConfig::default(),
1815            coa,
1816            vec!["1000".to_string()],
1817            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1818            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1819            42,
1820        );
1821
1822        let mut balanced_count = 0;
1823        for _ in 0..100 {
1824            let entry = je_gen.generate();
1825
1826            // Skip entries with human errors as they may be intentionally unbalanced
1827            let has_human_error = entry
1828                .header
1829                .header_text
1830                .as_ref()
1831                .map(|t| t.contains("[HUMAN_ERROR:"))
1832                .unwrap_or(false);
1833
1834            if !has_human_error {
1835                assert!(
1836                    entry.is_balanced(),
1837                    "Entry {:?} is not balanced",
1838                    entry.header.document_id
1839                );
1840                balanced_count += 1;
1841            }
1842            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1843        }
1844
1845        // Ensure most entries are balanced (human errors are rare)
1846        assert!(
1847            balanced_count >= 80,
1848            "Expected at least 80 balanced entries, got {}",
1849            balanced_count
1850        );
1851    }
1852
1853    #[test]
1854    fn test_deterministic_generation() {
1855        let mut coa_gen =
1856            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1857        let coa = Arc::new(coa_gen.generate());
1858
1859        let mut gen1 = JournalEntryGenerator::new_with_params(
1860            TransactionConfig::default(),
1861            Arc::clone(&coa),
1862            vec!["1000".to_string()],
1863            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1864            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1865            42,
1866        );
1867
1868        let mut gen2 = JournalEntryGenerator::new_with_params(
1869            TransactionConfig::default(),
1870            coa,
1871            vec!["1000".to_string()],
1872            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1873            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1874            42,
1875        );
1876
1877        for _ in 0..50 {
1878            let e1 = gen1.generate();
1879            let e2 = gen2.generate();
1880            assert_eq!(e1.header.document_id, e2.header.document_id);
1881            assert_eq!(e1.total_debit(), e2.total_debit());
1882        }
1883    }
1884
1885    #[test]
1886    fn test_templates_generate_descriptions() {
1887        let mut coa_gen =
1888            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1889        let coa = Arc::new(coa_gen.generate());
1890
1891        // Enable all template features
1892        let template_config = TemplateConfig {
1893            names: datasynth_config::schema::NameTemplateConfig {
1894                generate_realistic_names: true,
1895                email_domain: "test.com".to_string(),
1896                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1897            },
1898            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1899                generate_header_text: true,
1900                generate_line_text: true,
1901            },
1902            references: datasynth_config::schema::ReferenceTemplateConfig {
1903                generate_references: true,
1904                invoice_prefix: "TEST-INV".to_string(),
1905                po_prefix: "TEST-PO".to_string(),
1906                so_prefix: "TEST-SO".to_string(),
1907            },
1908        };
1909
1910        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1911            TransactionConfig::default(),
1912            coa,
1913            vec!["1000".to_string()],
1914            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1915            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1916            42,
1917            template_config,
1918            None,
1919        )
1920        .with_persona_errors(false); // Disable for template testing
1921
1922        for _ in 0..10 {
1923            let entry = je_gen.generate();
1924
1925            // Verify header text is populated
1926            assert!(
1927                entry.header.header_text.is_some(),
1928                "Header text should be populated"
1929            );
1930
1931            // Verify reference is populated
1932            assert!(
1933                entry.header.reference.is_some(),
1934                "Reference should be populated"
1935            );
1936
1937            // Verify business process is set
1938            assert!(
1939                entry.header.business_process.is_some(),
1940                "Business process should be set"
1941            );
1942
1943            // Verify line text is populated
1944            for line in &entry.lines {
1945                assert!(line.line_text.is_some(), "Line text should be populated");
1946            }
1947
1948            // Entry should still be balanced
1949            assert!(entry.is_balanced());
1950        }
1951    }
1952
1953    #[test]
1954    fn test_user_pool_integration() {
1955        let mut coa_gen =
1956            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1957        let coa = Arc::new(coa_gen.generate());
1958
1959        let companies = vec!["1000".to_string()];
1960
1961        // Generate user pool
1962        let mut user_gen = crate::UserGenerator::new(42);
1963        let user_pool = user_gen.generate_standard(&companies);
1964
1965        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1966            TransactionConfig::default(),
1967            coa,
1968            companies,
1969            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1970            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1971            42,
1972            TemplateConfig::default(),
1973            Some(user_pool),
1974        );
1975
1976        // Generate entries and verify user IDs are from pool
1977        for _ in 0..20 {
1978            let entry = je_gen.generate();
1979
1980            // User ID should not be generic BATCH/USER format when pool is used
1981            // (though it may still fall back if random selection misses)
1982            assert!(!entry.header.created_by.is_empty());
1983        }
1984    }
1985
1986    #[test]
1987    fn test_master_data_connection() {
1988        let mut coa_gen =
1989            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1990        let coa = Arc::new(coa_gen.generate());
1991
1992        // Create test vendors
1993        let vendors = vec![
1994            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1995            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1996        ];
1997
1998        // Create test customers
1999        let customers = vec![
2000            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2001            Customer::new(
2002                "C-TEST-002",
2003                "Test Customer Two",
2004                CustomerType::SmallBusiness,
2005            ),
2006        ];
2007
2008        // Create test materials
2009        let materials = vec![Material::new(
2010            "MAT-TEST-001",
2011            "Test Material A",
2012            MaterialType::RawMaterial,
2013        )];
2014
2015        // Create generator with master data
2016        let generator = JournalEntryGenerator::new_with_params(
2017            TransactionConfig::default(),
2018            coa,
2019            vec!["1000".to_string()],
2020            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2021            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2022            42,
2023        );
2024
2025        // Without master data
2026        assert!(!generator.is_using_real_master_data());
2027
2028        // Connect master data
2029        let generator_with_data = generator
2030            .with_vendors(&vendors)
2031            .with_customers(&customers)
2032            .with_materials(&materials);
2033
2034        // Should now be using real master data
2035        assert!(generator_with_data.is_using_real_master_data());
2036    }
2037
2038    #[test]
2039    fn test_with_master_data_convenience_method() {
2040        let mut coa_gen =
2041            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2042        let coa = Arc::new(coa_gen.generate());
2043
2044        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2045        let customers = vec![Customer::new(
2046            "C-001",
2047            "Customer One",
2048            CustomerType::Corporate,
2049        )];
2050        let materials = vec![Material::new(
2051            "MAT-001",
2052            "Material One",
2053            MaterialType::RawMaterial,
2054        )];
2055
2056        let generator = JournalEntryGenerator::new_with_params(
2057            TransactionConfig::default(),
2058            coa,
2059            vec!["1000".to_string()],
2060            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2061            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2062            42,
2063        )
2064        .with_master_data(&vendors, &customers, &materials);
2065
2066        assert!(generator.is_using_real_master_data());
2067    }
2068
2069    #[test]
2070    fn test_stress_factors_increase_error_rate() {
2071        let mut coa_gen =
2072            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2073        let coa = Arc::new(coa_gen.generate());
2074
2075        let generator = JournalEntryGenerator::new_with_params(
2076            TransactionConfig::default(),
2077            coa,
2078            vec!["1000".to_string()],
2079            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2080            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2081            42,
2082        );
2083
2084        let base_rate = 0.1;
2085
2086        // Regular day - no stress factors
2087        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2088        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2089        assert!(
2090            (regular_rate - base_rate).abs() < 0.01,
2091            "Regular day should have minimal stress factor adjustment"
2092        );
2093
2094        // Month end - 50% more errors
2095        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2096        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2097        assert!(
2098            month_end_rate > regular_rate,
2099            "Month end should have higher error rate than regular day"
2100        );
2101
2102        // Year end - double the error rate
2103        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2104        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2105        assert!(
2106            year_end_rate > month_end_rate,
2107            "Year end should have highest error rate"
2108        );
2109
2110        // Friday stress
2111        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2112        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2113        assert!(
2114            friday_rate > regular_rate,
2115            "Friday should have higher error rate than mid-week"
2116        );
2117
2118        // Monday stress
2119        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2120        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2121        assert!(
2122            monday_rate > regular_rate,
2123            "Monday should have higher error rate than mid-week"
2124        );
2125    }
2126
2127    #[test]
2128    fn test_batching_produces_similar_entries() {
2129        let mut coa_gen =
2130            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2131        let coa = Arc::new(coa_gen.generate());
2132
2133        // Use seed 123 which is more likely to trigger batching
2134        let mut je_gen = JournalEntryGenerator::new_with_params(
2135            TransactionConfig::default(),
2136            coa,
2137            vec!["1000".to_string()],
2138            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2139            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2140            123,
2141        )
2142        .with_persona_errors(false); // Disable to ensure balanced entries
2143
2144        // Generate many entries - at 15% batch rate, should see some batches
2145        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2146
2147        // Check that all entries are balanced (batched or not)
2148        for entry in &entries {
2149            assert!(
2150                entry.is_balanced(),
2151                "All entries including batched should be balanced"
2152            );
2153        }
2154
2155        // Count entries with same-day posting dates (batch indicator)
2156        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2157            std::collections::HashMap::new();
2158        for entry in &entries {
2159            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2160        }
2161
2162        // With batching, some dates should have multiple entries
2163        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2164        assert!(
2165            dates_with_multiple > 0,
2166            "With batching, should see some dates with multiple entries"
2167        );
2168    }
2169
2170    #[test]
2171    fn test_temporal_patterns_business_days() {
2172        use datasynth_config::schema::{
2173            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2174        };
2175
2176        let mut coa_gen =
2177            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2178        let coa = Arc::new(coa_gen.generate());
2179
2180        // Create temporal patterns config with business days enabled
2181        let temporal_config = TemporalPatternsConfig {
2182            enabled: true,
2183            business_days: BusinessDaySchemaConfig {
2184                enabled: true,
2185                ..Default::default()
2186            },
2187            calendars: CalendarSchemaConfig {
2188                regions: vec!["US".to_string()],
2189                custom_holidays: vec![],
2190            },
2191            ..Default::default()
2192        };
2193
2194        let mut je_gen = JournalEntryGenerator::new_with_params(
2195            TransactionConfig::default(),
2196            coa,
2197            vec!["1000".to_string()],
2198            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2199            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2200            42,
2201        )
2202        .with_temporal_patterns(temporal_config, 42)
2203        .with_persona_errors(false);
2204
2205        // Generate entries and verify none fall on weekends
2206        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2207
2208        for entry in &entries {
2209            let weekday = entry.header.posting_date.weekday();
2210            assert!(
2211                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2212                "Posting date {:?} should not be a weekend",
2213                entry.header.posting_date
2214            );
2215        }
2216    }
2217}