Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{
11    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
12};
13use datasynth_core::distributions::{
14    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
15    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
16    ProcessingLagCalculator, ProcessingLagConfig, *,
17};
18use datasynth_core::models::*;
19use datasynth_core::templates::{
20    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
21};
22use datasynth_core::traits::Generator;
23use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
24
25use crate::company_selector::WeightedCompanySelector;
26use crate::user_generator::{UserGenerator, UserGeneratorConfig};
27
28/// Generator for realistic journal entries.
29pub struct JournalEntryGenerator {
30    rng: ChaCha8Rng,
31    seed: u64,
32    config: TransactionConfig,
33    coa: Arc<ChartOfAccounts>,
34    companies: Vec<String>,
35    company_selector: WeightedCompanySelector,
36    line_sampler: LineItemSampler,
37    amount_sampler: AmountSampler,
38    temporal_sampler: TemporalSampler,
39    start_date: NaiveDate,
40    end_date: NaiveDate,
41    count: u64,
42    uuid_factory: DeterministicUuidFactory,
43    // Enhanced features
44    user_pool: Option<UserPool>,
45    description_generator: DescriptionGenerator,
46    reference_generator: ReferenceGenerator,
47    template_config: TemplateConfig,
48    vendor_pool: VendorPool,
49    customer_pool: CustomerPool,
50    // Material pool for realistic material references
51    material_pool: Option<MaterialPool>,
52    // Flag indicating whether we're using real master data vs defaults
53    using_real_master_data: bool,
54    // Fraud generation
55    fraud_config: FraudConfig,
56    // Persona-based error injection
57    persona_errors_enabled: bool,
58    // Approval threshold enforcement
59    approval_enabled: bool,
60    approval_threshold: rust_decimal::Decimal,
61    // Batching behavior - humans often process similar items together
62    batch_state: Option<BatchState>,
63    // Temporal drift controller for simulating distribution changes over time
64    drift_controller: Option<DriftController>,
65    // Temporal patterns components
66    business_day_calculator: Option<BusinessDayCalculator>,
67    processing_lag_calculator: Option<ProcessingLagCalculator>,
68    temporal_patterns_config: Option<TemporalPatternsConfig>,
69}
70
71/// State for tracking batch processing behavior.
72///
73/// When humans process transactions, they often batch similar items together
74/// (e.g., processing all invoices from one vendor, entering similar expenses).
75#[derive(Clone)]
76struct BatchState {
77    /// The base entry template to vary
78    base_vendor: Option<String>,
79    base_customer: Option<String>,
80    base_account_number: String,
81    base_amount: rust_decimal::Decimal,
82    base_business_process: Option<BusinessProcess>,
83    base_posting_date: NaiveDate,
84    /// Remaining entries in this batch
85    remaining: u8,
86}
87
88impl JournalEntryGenerator {
89    /// Create a new journal entry generator.
90    pub fn new_with_params(
91        config: TransactionConfig,
92        coa: Arc<ChartOfAccounts>,
93        companies: Vec<String>,
94        start_date: NaiveDate,
95        end_date: NaiveDate,
96        seed: u64,
97    ) -> Self {
98        Self::new_with_full_config(
99            config,
100            coa,
101            companies,
102            start_date,
103            end_date,
104            seed,
105            TemplateConfig::default(),
106            None,
107        )
108    }
109
110    /// Create a new journal entry generator with full configuration.
111    #[allow(clippy::too_many_arguments)]
112    pub fn new_with_full_config(
113        config: TransactionConfig,
114        coa: Arc<ChartOfAccounts>,
115        companies: Vec<String>,
116        start_date: NaiveDate,
117        end_date: NaiveDate,
118        seed: u64,
119        template_config: TemplateConfig,
120        user_pool: Option<UserPool>,
121    ) -> Self {
122        // Initialize user pool if not provided
123        let user_pool = user_pool.or_else(|| {
124            if template_config.names.generate_realistic_names {
125                let user_gen_config = UserGeneratorConfig {
126                    culture_distribution: vec![
127                        (
128                            datasynth_core::templates::NameCulture::WesternUs,
129                            template_config.names.culture_distribution.western_us,
130                        ),
131                        (
132                            datasynth_core::templates::NameCulture::Hispanic,
133                            template_config.names.culture_distribution.hispanic,
134                        ),
135                        (
136                            datasynth_core::templates::NameCulture::German,
137                            template_config.names.culture_distribution.german,
138                        ),
139                        (
140                            datasynth_core::templates::NameCulture::French,
141                            template_config.names.culture_distribution.french,
142                        ),
143                        (
144                            datasynth_core::templates::NameCulture::Chinese,
145                            template_config.names.culture_distribution.chinese,
146                        ),
147                        (
148                            datasynth_core::templates::NameCulture::Japanese,
149                            template_config.names.culture_distribution.japanese,
150                        ),
151                        (
152                            datasynth_core::templates::NameCulture::Indian,
153                            template_config.names.culture_distribution.indian,
154                        ),
155                    ],
156                    email_domain: template_config.names.email_domain.clone(),
157                    generate_realistic_names: true,
158                };
159                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
160                Some(user_gen.generate_standard(&companies))
161            } else {
162                None
163            }
164        });
165
166        // Initialize reference generator
167        let mut ref_gen = ReferenceGenerator::new(
168            start_date.year(),
169            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
170        );
171        ref_gen.set_prefix(
172            ReferenceType::Invoice,
173            &template_config.references.invoice_prefix,
174        );
175        ref_gen.set_prefix(
176            ReferenceType::PurchaseOrder,
177            &template_config.references.po_prefix,
178        );
179        ref_gen.set_prefix(
180            ReferenceType::SalesOrder,
181            &template_config.references.so_prefix,
182        );
183
184        // Create weighted company selector (uniform weights for this constructor)
185        let company_selector = WeightedCompanySelector::uniform(companies.clone());
186
187        Self {
188            rng: ChaCha8Rng::seed_from_u64(seed),
189            seed,
190            config: config.clone(),
191            coa,
192            companies,
193            company_selector,
194            line_sampler: LineItemSampler::with_config(
195                seed + 1,
196                config.line_item_distribution.clone(),
197                config.even_odd_distribution.clone(),
198                config.debit_credit_distribution.clone(),
199            ),
200            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
201            temporal_sampler: TemporalSampler::with_config(
202                seed + 3,
203                config.seasonality.clone(),
204                WorkingHoursConfig::default(),
205                Vec::new(),
206            ),
207            start_date,
208            end_date,
209            count: 0,
210            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
211            user_pool,
212            description_generator: DescriptionGenerator::new(),
213            reference_generator: ref_gen,
214            template_config,
215            vendor_pool: VendorPool::standard(),
216            customer_pool: CustomerPool::standard(),
217            material_pool: None,
218            using_real_master_data: false,
219            fraud_config: FraudConfig::default(),
220            persona_errors_enabled: true, // Enable by default for realism
221            approval_enabled: true,       // Enable by default for realism
222            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
223            batch_state: None,
224            drift_controller: None,
225            business_day_calculator: None,
226            processing_lag_calculator: None,
227            temporal_patterns_config: None,
228        }
229    }
230
231    /// Create from a full GeneratorConfig.
232    ///
233    /// This constructor uses the volume_weight from company configs
234    /// for weighted company selection, and fraud config from GeneratorConfig.
235    pub fn from_generator_config(
236        full_config: &GeneratorConfig,
237        coa: Arc<ChartOfAccounts>,
238        start_date: NaiveDate,
239        end_date: NaiveDate,
240        seed: u64,
241    ) -> Self {
242        let companies: Vec<String> = full_config
243            .companies
244            .iter()
245            .map(|c| c.code.clone())
246            .collect();
247
248        // Create weighted selector using volume_weight from company configs
249        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
250
251        let mut generator = Self::new_with_full_config(
252            full_config.transactions.clone(),
253            coa,
254            companies,
255            start_date,
256            end_date,
257            seed,
258            full_config.templates.clone(),
259            None,
260        );
261
262        // Override the uniform selector with weighted selector
263        generator.company_selector = company_selector;
264
265        // Set fraud config
266        generator.fraud_config = full_config.fraud.clone();
267
268        // Configure temporal patterns if enabled
269        let temporal_config = &full_config.temporal_patterns;
270        if temporal_config.enabled {
271            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
272        }
273
274        generator
275    }
276
277    /// Configure temporal patterns including business day calculations and processing lags.
278    ///
279    /// This enables realistic temporal behavior including:
280    /// - Business day awareness (no postings on weekends/holidays)
281    /// - Processing lag modeling (event-to-posting delays)
282    /// - Period-end dynamics (volume spikes at month/quarter/year end)
283    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
284        // Create business day calculator if enabled
285        if config.business_days.enabled {
286            let region = config
287                .calendars
288                .regions
289                .first()
290                .map(|r| Self::parse_region(r))
291                .unwrap_or(Region::US);
292
293            let calendar = HolidayCalendar::new(region, self.start_date.year());
294            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
295        }
296
297        // Create processing lag calculator if enabled
298        if config.processing_lags.enabled {
299            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
300            self.processing_lag_calculator =
301                Some(ProcessingLagCalculator::with_config(seed, lag_config));
302        }
303
304        // Create period-end dynamics if configured
305        let model = config.period_end.model.as_deref().unwrap_or("flat");
306        if model != "flat"
307            || config
308                .period_end
309                .month_end
310                .as_ref()
311                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
312        {
313            let dynamics = Self::convert_period_end_config(&config.period_end);
314            self.temporal_sampler.set_period_end_dynamics(dynamics);
315        }
316
317        self.temporal_patterns_config = Some(config);
318        self
319    }
320
321    /// Convert schema processing lag config to core config.
322    fn convert_processing_lag_config(
323        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
324    ) -> ProcessingLagConfig {
325        let mut config = ProcessingLagConfig {
326            enabled: schema.enabled,
327            ..Default::default()
328        };
329
330        // Helper to convert lag schema to distribution
331        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
332            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
333            if let Some(min) = lag.min_hours {
334                dist.min_lag_hours = min;
335            }
336            if let Some(max) = lag.max_hours {
337                dist.max_lag_hours = max;
338            }
339            dist
340        };
341
342        // Apply event-specific lags
343        if let Some(ref lag) = schema.sales_order_lag {
344            config
345                .event_lags
346                .insert(EventType::SalesOrder, convert_lag(lag));
347        }
348        if let Some(ref lag) = schema.purchase_order_lag {
349            config
350                .event_lags
351                .insert(EventType::PurchaseOrder, convert_lag(lag));
352        }
353        if let Some(ref lag) = schema.goods_receipt_lag {
354            config
355                .event_lags
356                .insert(EventType::GoodsReceipt, convert_lag(lag));
357        }
358        if let Some(ref lag) = schema.invoice_receipt_lag {
359            config
360                .event_lags
361                .insert(EventType::InvoiceReceipt, convert_lag(lag));
362        }
363        if let Some(ref lag) = schema.invoice_issue_lag {
364            config
365                .event_lags
366                .insert(EventType::InvoiceIssue, convert_lag(lag));
367        }
368        if let Some(ref lag) = schema.payment_lag {
369            config
370                .event_lags
371                .insert(EventType::Payment, convert_lag(lag));
372        }
373        if let Some(ref lag) = schema.journal_entry_lag {
374            config
375                .event_lags
376                .insert(EventType::JournalEntry, convert_lag(lag));
377        }
378
379        // Apply cross-day posting config
380        if let Some(ref cross_day) = schema.cross_day_posting {
381            config.cross_day = CrossDayConfig {
382                enabled: cross_day.enabled,
383                probability_by_hour: cross_day.probability_by_hour.clone(),
384                ..Default::default()
385            };
386        }
387
388        config
389    }
390
391    /// Convert schema period-end config to core PeriodEndDynamics.
392    fn convert_period_end_config(
393        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
394    ) -> PeriodEndDynamics {
395        let model_type = schema.model.as_deref().unwrap_or("exponential");
396
397        // Helper to convert period config
398        let convert_period =
399            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
400             default_peak: f64|
401             -> PeriodEndConfig {
402                if let Some(p) = period {
403                    let model = match model_type {
404                        "flat" => PeriodEndModel::FlatMultiplier {
405                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
406                        },
407                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
408                            start_day: p.start_day.unwrap_or(-10),
409                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
410                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
411                            ramp_up_days: 3, // Default ramp-up period
412                        },
413                        _ => PeriodEndModel::ExponentialAcceleration {
414                            start_day: p.start_day.unwrap_or(-10),
415                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
416                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
417                            decay_rate: p.decay_rate.unwrap_or(0.3),
418                        },
419                    };
420                    PeriodEndConfig {
421                        enabled: true,
422                        model,
423                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
424                    }
425                } else {
426                    PeriodEndConfig {
427                        enabled: true,
428                        model: PeriodEndModel::ExponentialAcceleration {
429                            start_day: -10,
430                            base_multiplier: 1.0,
431                            peak_multiplier: default_peak,
432                            decay_rate: 0.3,
433                        },
434                        additional_multiplier: 1.0,
435                    }
436                }
437            };
438
439        PeriodEndDynamics::new(
440            convert_period(schema.month_end.as_ref(), 2.0),
441            convert_period(schema.quarter_end.as_ref(), 3.5),
442            convert_period(schema.year_end.as_ref(), 5.0),
443        )
444    }
445
446    /// Parse a region string into a Region enum.
447    fn parse_region(region_str: &str) -> Region {
448        match region_str.to_uppercase().as_str() {
449            "US" => Region::US,
450            "DE" => Region::DE,
451            "GB" => Region::GB,
452            "CN" => Region::CN,
453            "JP" => Region::JP,
454            "IN" => Region::IN,
455            "BR" => Region::BR,
456            "MX" => Region::MX,
457            "AU" => Region::AU,
458            "SG" => Region::SG,
459            "KR" => Region::KR,
460            _ => Region::US,
461        }
462    }
463
464    /// Set a custom company selector.
465    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
466        self.company_selector = selector;
467    }
468
469    /// Get the current company selector.
470    pub fn company_selector(&self) -> &WeightedCompanySelector {
471        &self.company_selector
472    }
473
474    /// Set fraud configuration.
475    pub fn set_fraud_config(&mut self, config: FraudConfig) {
476        self.fraud_config = config;
477    }
478
479    /// Set vendors from generated master data.
480    ///
481    /// This replaces the default vendor pool with actual generated vendors,
482    /// ensuring JEs reference real master data entities.
483    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
484        if !vendors.is_empty() {
485            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
486            self.using_real_master_data = true;
487        }
488        self
489    }
490
491    /// Set customers from generated master data.
492    ///
493    /// This replaces the default customer pool with actual generated customers,
494    /// ensuring JEs reference real master data entities.
495    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
496        if !customers.is_empty() {
497            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
498            self.using_real_master_data = true;
499        }
500        self
501    }
502
503    /// Set materials from generated master data.
504    ///
505    /// This provides material references for JEs that involve inventory movements.
506    pub fn with_materials(mut self, materials: &[Material]) -> Self {
507        if !materials.is_empty() {
508            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
509            self.using_real_master_data = true;
510        }
511        self
512    }
513
514    /// Set all master data at once for convenience.
515    ///
516    /// This is the recommended way to configure the JE generator with
517    /// generated master data to ensure data coherence.
518    pub fn with_master_data(
519        self,
520        vendors: &[Vendor],
521        customers: &[Customer],
522        materials: &[Material],
523    ) -> Self {
524        self.with_vendors(vendors)
525            .with_customers(customers)
526            .with_materials(materials)
527    }
528
529    /// Check if the generator is using real master data.
530    pub fn is_using_real_master_data(&self) -> bool {
531        self.using_real_master_data
532    }
533
534    /// Determine if this transaction should be fraudulent.
535    fn determine_fraud(&mut self) -> Option<FraudType> {
536        if !self.fraud_config.enabled {
537            return None;
538        }
539
540        // Roll for fraud based on fraud rate
541        if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
542            return None;
543        }
544
545        // Select fraud type based on distribution
546        Some(self.select_fraud_type())
547    }
548
549    /// Select a fraud type based on the configured distribution.
550    fn select_fraud_type(&mut self) -> FraudType {
551        let dist = &self.fraud_config.fraud_type_distribution;
552        let roll: f64 = self.rng.gen();
553
554        let mut cumulative = 0.0;
555
556        cumulative += dist.suspense_account_abuse;
557        if roll < cumulative {
558            return FraudType::SuspenseAccountAbuse;
559        }
560
561        cumulative += dist.fictitious_transaction;
562        if roll < cumulative {
563            return FraudType::FictitiousTransaction;
564        }
565
566        cumulative += dist.revenue_manipulation;
567        if roll < cumulative {
568            return FraudType::RevenueManipulation;
569        }
570
571        cumulative += dist.expense_capitalization;
572        if roll < cumulative {
573            return FraudType::ExpenseCapitalization;
574        }
575
576        cumulative += dist.split_transaction;
577        if roll < cumulative {
578            return FraudType::SplitTransaction;
579        }
580
581        cumulative += dist.timing_anomaly;
582        if roll < cumulative {
583            return FraudType::TimingAnomaly;
584        }
585
586        cumulative += dist.unauthorized_access;
587        if roll < cumulative {
588            return FraudType::UnauthorizedAccess;
589        }
590
591        // Default fallback
592        FraudType::DuplicatePayment
593    }
594
595    /// Map a fraud type to an amount pattern for suspicious amounts.
596    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
597        match fraud_type {
598            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
599                FraudAmountPattern::ThresholdAdjacent
600            }
601            FraudType::FictitiousTransaction
602            | FraudType::FictitiousEntry
603            | FraudType::SuspenseAccountAbuse
604            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
605            FraudType::RevenueManipulation
606            | FraudType::ExpenseCapitalization
607            | FraudType::ImproperCapitalization
608            | FraudType::ReserveManipulation
609            | FraudType::UnauthorizedAccess
610            | FraudType::PrematureRevenue
611            | FraudType::UnderstatedLiabilities
612            | FraudType::OverstatedAssets
613            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
614            FraudType::DuplicatePayment
615            | FraudType::TimingAnomaly
616            | FraudType::SelfApproval
617            | FraudType::ExceededApprovalLimit
618            | FraudType::SegregationOfDutiesViolation
619            | FraudType::UnauthorizedApproval
620            | FraudType::CollusiveApproval
621            | FraudType::FictitiousVendor
622            | FraudType::ShellCompanyPayment
623            | FraudType::Kickback
624            | FraudType::KickbackScheme
625            | FraudType::InvoiceManipulation
626            | FraudType::AssetMisappropriation
627            | FraudType::InventoryTheft
628            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
629            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
630            FraudType::ImproperRevenueRecognition
631            | FraudType::ImproperPoAllocation
632            | FraudType::VariableConsiderationManipulation
633            | FraudType::ContractModificationMisstatement => {
634                FraudAmountPattern::StatisticallyImprobable
635            }
636            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
637            FraudType::LeaseClassificationManipulation
638            | FraudType::OffBalanceSheetLease
639            | FraudType::LeaseLiabilityUnderstatement
640            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
641            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
642            FraudType::FairValueHierarchyManipulation
643            | FraudType::Level3InputManipulation
644            | FraudType::ValuationTechniqueManipulation => {
645                FraudAmountPattern::StatisticallyImprobable
646            }
647            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
648            FraudType::DelayedImpairment
649            | FraudType::ImpairmentTestAvoidance
650            | FraudType::CashFlowProjectionManipulation
651            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
652            // Sourcing/Procurement Fraud
653            FraudType::BidRigging
654            | FraudType::PhantomVendorContract
655            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
656            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
657            // HR/Payroll Fraud
658            FraudType::GhostEmployeePayroll
659            | FraudType::PayrollInflation
660            | FraudType::DuplicateExpenseReport
661            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
662            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
663            // O2C Fraud
664            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
665            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
666        }
667    }
668
669    /// Generate a deterministic UUID using the factory.
670    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
671        self.uuid_factory.next()
672    }
673
674    /// Generate a single journal entry.
675    pub fn generate(&mut self) -> JournalEntry {
676        // Check if we're in a batch - if so, generate a batched entry
677        if let Some(ref state) = self.batch_state {
678            if state.remaining > 0 {
679                return self.generate_batched_entry();
680            }
681        }
682
683        self.count += 1;
684
685        // Generate deterministic document ID
686        let document_id = self.generate_deterministic_uuid();
687
688        // Sample posting date
689        let mut posting_date = self
690            .temporal_sampler
691            .sample_date(self.start_date, self.end_date);
692
693        // Adjust posting date to be a business day if business day calculator is configured
694        if let Some(ref calc) = self.business_day_calculator {
695            if !calc.is_business_day(posting_date) {
696                // Move to next business day
697                posting_date = calc.next_business_day(posting_date, false);
698                // Ensure we don't exceed end_date
699                if posting_date > self.end_date {
700                    posting_date = calc.prev_business_day(self.end_date, true);
701                }
702            }
703        }
704
705        // Select company using weighted selector
706        let company_code = self.company_selector.select(&mut self.rng).to_string();
707
708        // Sample line item specification
709        let line_spec = self.line_sampler.sample();
710
711        // Determine source type using full 4-way distribution
712        let source = self.select_source();
713        let is_automated = matches!(
714            source,
715            TransactionSource::Automated | TransactionSource::Recurring
716        );
717
718        // Select business process
719        let business_process = self.select_business_process();
720
721        // Determine if this is a fraudulent transaction
722        let fraud_type = self.determine_fraud();
723        let is_fraud = fraud_type.is_some();
724
725        // Sample time based on source
726        let time = self.temporal_sampler.sample_time(!is_automated);
727        let created_at = posting_date.and_time(time).and_utc();
728
729        // Select user from pool or generate generic
730        let (created_by, user_persona) = self.select_user(is_automated);
731
732        // Create header with deterministic UUID
733        let mut header =
734            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
735        header.created_at = created_at;
736        header.source = source;
737        header.created_by = created_by;
738        header.user_persona = user_persona;
739        header.business_process = Some(business_process);
740        header.is_fraud = is_fraud;
741        header.fraud_type = fraud_type;
742
743        // Generate description context
744        let mut context =
745            DescriptionContext::with_period(posting_date.month(), posting_date.year());
746
747        // Add vendor/customer context based on business process
748        match business_process {
749            BusinessProcess::P2P => {
750                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
751                    context.vendor_name = Some(vendor.name.clone());
752                }
753            }
754            BusinessProcess::O2C => {
755                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
756                    context.customer_name = Some(customer.name.clone());
757                }
758            }
759            _ => {}
760        }
761
762        // Generate header text if enabled
763        if self.template_config.descriptions.generate_header_text {
764            header.header_text = Some(self.description_generator.generate_header_text(
765                business_process,
766                &context,
767                &mut self.rng,
768            ));
769        }
770
771        // Generate reference if enabled
772        if self.template_config.references.generate_references {
773            header.reference = Some(
774                self.reference_generator
775                    .generate_for_process_year(business_process, posting_date.year()),
776            );
777        }
778
779        // Generate line items
780        let mut entry = JournalEntry::new(header);
781
782        // Generate amount - use fraud pattern if this is a fraudulent transaction
783        let base_amount = if let Some(ft) = fraud_type {
784            let pattern = self.fraud_type_to_amount_pattern(ft);
785            self.amount_sampler.sample_fraud(pattern)
786        } else {
787            self.amount_sampler.sample()
788        };
789
790        // Apply temporal drift if configured
791        let drift_adjusted_amount = {
792            let drift = self.get_drift_adjustments(posting_date);
793            if drift.amount_mean_multiplier != 1.0 {
794                // Apply drift multiplier (includes seasonal factor if enabled)
795                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
796                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
797                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
798            } else {
799                base_amount
800            }
801        };
802
803        // Apply human variation to amounts for non-automated transactions
804        let total_amount = if is_automated {
805            drift_adjusted_amount // Automated systems use exact amounts
806        } else {
807            self.apply_human_variation(drift_adjusted_amount)
808        };
809
810        // Generate debit lines
811        let debit_amounts = self
812            .amount_sampler
813            .sample_summing_to(line_spec.debit_count, total_amount);
814        for (i, amount) in debit_amounts.into_iter().enumerate() {
815            let account_number = self.select_debit_account().account_number.clone();
816            let mut line = JournalEntryLine::debit(
817                entry.header.document_id,
818                (i + 1) as u32,
819                account_number.clone(),
820                amount,
821            );
822
823            // Generate line text if enabled
824            if self.template_config.descriptions.generate_line_text {
825                line.line_text = Some(self.description_generator.generate_line_text(
826                    &account_number,
827                    &context,
828                    &mut self.rng,
829                ));
830            }
831
832            entry.add_line(line);
833        }
834
835        // Generate credit lines - use the SAME amounts to ensure balance
836        let credit_amounts = self
837            .amount_sampler
838            .sample_summing_to(line_spec.credit_count, total_amount);
839        for (i, amount) in credit_amounts.into_iter().enumerate() {
840            let account_number = self.select_credit_account().account_number.clone();
841            let mut line = JournalEntryLine::credit(
842                entry.header.document_id,
843                (line_spec.debit_count + i + 1) as u32,
844                account_number.clone(),
845                amount,
846            );
847
848            // Generate line text if enabled
849            if self.template_config.descriptions.generate_line_text {
850                line.line_text = Some(self.description_generator.generate_line_text(
851                    &account_number,
852                    &context,
853                    &mut self.rng,
854                ));
855            }
856
857            entry.add_line(line);
858        }
859
860        // Apply persona-based errors if enabled and it's a human user
861        if self.persona_errors_enabled && !is_automated {
862            self.maybe_inject_persona_error(&mut entry);
863        }
864
865        // Apply approval workflow if enabled and amount exceeds threshold
866        if self.approval_enabled {
867            self.maybe_apply_approval_workflow(&mut entry, posting_date);
868        }
869
870        // Maybe start a batch of similar entries for realism
871        self.maybe_start_batch(&entry);
872
873        entry
874    }
875
876    /// Enable or disable persona-based error injection.
877    ///
878    /// When enabled, entries created by human personas have a chance
879    /// to contain realistic human errors based on their experience level.
880    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
881        self.persona_errors_enabled = enabled;
882        self
883    }
884
885    /// Set fraud configuration for fraud injection.
886    ///
887    /// When fraud is enabled in the config, transactions have a chance
888    /// to be marked as fraudulent based on the configured fraud rate.
889    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
890        self.fraud_config = config;
891        self
892    }
893
894    /// Check if persona errors are enabled.
895    pub fn persona_errors_enabled(&self) -> bool {
896        self.persona_errors_enabled
897    }
898
899    /// Enable or disable batch processing behavior.
900    ///
901    /// When enabled (default), the generator will occasionally produce batches
902    /// of similar entries, simulating how humans batch similar work together.
903    pub fn with_batching(mut self, enabled: bool) -> Self {
904        if !enabled {
905            self.batch_state = None;
906        }
907        self
908    }
909
910    /// Check if batch processing is enabled.
911    pub fn batching_enabled(&self) -> bool {
912        // Batching is implicitly enabled when not explicitly disabled
913        true
914    }
915
916    /// Maybe start a batch based on the current entry.
917    ///
918    /// Humans often batch similar work: processing invoices from one vendor,
919    /// entering expense reports for a trip, reconciling similar items.
920    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
921        // Only start batch for non-automated, non-fraud entries
922        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
923            return;
924        }
925
926        // 15% chance to start a batch (most work is not batched)
927        if self.rng.gen::<f64>() > 0.15 {
928            return;
929        }
930
931        // Extract key attributes for batching
932        let base_account = entry
933            .lines
934            .first()
935            .map(|l| l.gl_account.clone())
936            .unwrap_or_default();
937
938        let base_amount = entry.total_debit();
939
940        self.batch_state = Some(BatchState {
941            base_vendor: None, // Would need vendor from context
942            base_customer: None,
943            base_account_number: base_account,
944            base_amount,
945            base_business_process: entry.header.business_process,
946            base_posting_date: entry.header.posting_date,
947            remaining: self.rng.gen_range(2..7), // 2-6 more similar entries
948        });
949    }
950
951    /// Generate an entry that's part of the current batch.
952    ///
953    /// Batched entries have:
954    /// - Same or very similar business process
955    /// - Same posting date (batched work done together)
956    /// - Similar amounts (within ±15%)
957    /// - Same debit account (processing similar items)
958    fn generate_batched_entry(&mut self) -> JournalEntry {
959        use rust_decimal::Decimal;
960
961        // Decrement batch counter
962        if let Some(ref mut state) = self.batch_state {
963            state.remaining = state.remaining.saturating_sub(1);
964        }
965
966        let batch = self
967            .batch_state
968            .clone()
969            .expect("batch_state set before calling generate_batched_entry");
970
971        // Use the batch's posting date (work done on same day)
972        let posting_date = batch.base_posting_date;
973
974        self.count += 1;
975        let document_id = self.generate_deterministic_uuid();
976
977        // Select same company (batched work is usually same company)
978        let company_code = self.company_selector.select(&mut self.rng).to_string();
979
980        // Use simplified line spec for batched entries (usually 2-line)
981        let _line_spec = LineItemSpec {
982            total_count: 2,
983            debit_count: 1,
984            credit_count: 1,
985            split_type: DebitCreditSplit::Equal,
986        };
987
988        // Batched entries are always manual
989        let source = TransactionSource::Manual;
990
991        // Use the batch's business process
992        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
993
994        // Sample time
995        let time = self.temporal_sampler.sample_time(true);
996        let created_at = posting_date.and_time(time).and_utc();
997
998        // Same user for batched work
999        let (created_by, user_persona) = self.select_user(false);
1000
1001        // Create header
1002        let mut header =
1003            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1004        header.created_at = created_at;
1005        header.source = source;
1006        header.created_by = created_by;
1007        header.user_persona = user_persona;
1008        header.business_process = Some(business_process);
1009
1010        // Generate similar amount (within ±15% of base)
1011        let variation = self.rng.gen_range(-0.15..0.15);
1012        let varied_amount =
1013            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1014        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1015
1016        // Create the entry
1017        let mut entry = JournalEntry::new(header);
1018
1019        // Use same debit account as batch base
1020        let debit_line = JournalEntryLine::debit(
1021            entry.header.document_id,
1022            1,
1023            batch.base_account_number.clone(),
1024            total_amount,
1025        );
1026        entry.add_line(debit_line);
1027
1028        // Select a credit account
1029        let credit_account = self.select_credit_account().account_number.clone();
1030        let credit_line =
1031            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1032        entry.add_line(credit_line);
1033
1034        // Apply persona-based errors if enabled
1035        if self.persona_errors_enabled {
1036            self.maybe_inject_persona_error(&mut entry);
1037        }
1038
1039        // Apply approval workflow if enabled
1040        if self.approval_enabled {
1041            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1042        }
1043
1044        // Clear batch state if no more entries remaining
1045        if batch.remaining <= 1 {
1046            self.batch_state = None;
1047        }
1048
1049        entry
1050    }
1051
1052    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1053    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1054        // Parse persona from the entry header
1055        let persona_str = &entry.header.user_persona;
1056        let persona = match persona_str.to_lowercase().as_str() {
1057            s if s.contains("junior") => UserPersona::JuniorAccountant,
1058            s if s.contains("senior") => UserPersona::SeniorAccountant,
1059            s if s.contains("controller") => UserPersona::Controller,
1060            s if s.contains("manager") => UserPersona::Manager,
1061            s if s.contains("executive") => UserPersona::Executive,
1062            _ => return, // Don't inject errors for unknown personas
1063        };
1064
1065        // Get base error rate from persona
1066        let base_error_rate = persona.error_rate();
1067
1068        // Apply stress factors based on posting date
1069        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1070
1071        // Check if error should occur based on adjusted rate
1072        if self.rng.gen::<f64>() >= adjusted_rate {
1073            return; // No error this time
1074        }
1075
1076        // Select and inject persona-appropriate error
1077        self.inject_human_error(entry, persona);
1078    }
1079
1080    /// Apply contextual stress factors to the base error rate.
1081    ///
1082    /// Stress factors increase error likelihood during:
1083    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1084    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1085    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1086    /// - Monday morning (catch-up work): 20% more errors
1087    /// - Friday afternoon (rushing to leave): 30% more errors
1088    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1089        use chrono::Datelike;
1090
1091        let mut rate = base_rate;
1092        let day = posting_date.day();
1093        let month = posting_date.month();
1094
1095        // Year-end stress (December 28-31): double the error rate
1096        if month == 12 && day >= 28 {
1097            rate *= 2.0;
1098            return rate.min(0.5); // Cap at 50% to keep it realistic
1099        }
1100
1101        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1102        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1103            rate *= 1.75; // 75% more errors at quarter end
1104            return rate.min(0.4);
1105        }
1106
1107        // Month-end stress (last 3 days of month)
1108        if day >= 28 {
1109            rate *= 1.5; // 50% more errors at month end
1110        }
1111
1112        // Day-of-week stress effects
1113        let weekday = posting_date.weekday();
1114        match weekday {
1115            chrono::Weekday::Mon => {
1116                // Monday: catching up, often rushed
1117                rate *= 1.2;
1118            }
1119            chrono::Weekday::Fri => {
1120                // Friday: rushing to finish before weekend
1121                rate *= 1.3;
1122            }
1123            _ => {}
1124        }
1125
1126        // Cap at 40% to keep it realistic
1127        rate.min(0.4)
1128    }
1129
1130    /// Apply human-like variation to an amount.
1131    ///
1132    /// Humans don't enter perfectly calculated amounts - they:
1133    /// - Round amounts differently
1134    /// - Estimate instead of calculating exactly
1135    /// - Make small input variations
1136    ///
1137    /// This applies small variations (typically ±2%) to make amounts more realistic.
1138    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1139        use rust_decimal::Decimal;
1140
1141        // Automated transactions or very small amounts don't get variation
1142        if amount < Decimal::from(10) {
1143            return amount;
1144        }
1145
1146        // 70% chance of human variation being applied
1147        if self.rng.gen::<f64>() > 0.70 {
1148            return amount;
1149        }
1150
1151        // Decide which type of human variation to apply
1152        let variation_type: u8 = self.rng.gen_range(0..4);
1153
1154        match variation_type {
1155            0 => {
1156                // ±2% variation (common for estimated amounts)
1157                let variation_pct = self.rng.gen_range(-0.02..0.02);
1158                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1159                (amount + variation).round_dp(2)
1160            }
1161            1 => {
1162                // Round to nearest $10
1163                let ten = Decimal::from(10);
1164                (amount / ten).round() * ten
1165            }
1166            2 => {
1167                // Round to nearest $100 (for larger amounts)
1168                if amount >= Decimal::from(500) {
1169                    let hundred = Decimal::from(100);
1170                    (amount / hundred).round() * hundred
1171                } else {
1172                    amount
1173                }
1174            }
1175            3 => {
1176                // Slight under/over payment (±$0.01 to ±$1.00)
1177                let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1178                (amount + cents).max(Decimal::ZERO).round_dp(2)
1179            }
1180            _ => amount,
1181        }
1182    }
1183
1184    /// Rebalance an entry after a one-sided amount modification.
1185    ///
1186    /// When an error modifies one line's amount, this finds a line on the opposite
1187    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1188    /// same impact to maintain balance.
1189    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1190        // Find a line on the opposite side to adjust
1191        let balancing_idx = entry.lines.iter().position(|l| {
1192            if modified_was_debit {
1193                l.credit_amount > Decimal::ZERO
1194            } else {
1195                l.debit_amount > Decimal::ZERO
1196            }
1197        });
1198
1199        if let Some(idx) = balancing_idx {
1200            if modified_was_debit {
1201                entry.lines[idx].credit_amount += impact;
1202            } else {
1203                entry.lines[idx].debit_amount += impact;
1204            }
1205        }
1206    }
1207
1208    /// Inject a human-like error based on the persona.
1209    ///
1210    /// All error types maintain balance - amount modifications are applied to both sides.
1211    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1212    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1213        use rust_decimal::Decimal;
1214
1215        // Different personas make different types of errors
1216        let error_type: u8 = match persona {
1217            UserPersona::JuniorAccountant => {
1218                // Junior accountants make more varied errors
1219                self.rng.gen_range(0..5)
1220            }
1221            UserPersona::SeniorAccountant => {
1222                // Senior accountants mainly make transposition errors
1223                self.rng.gen_range(0..3)
1224            }
1225            UserPersona::Controller | UserPersona::Manager => {
1226                // Controllers/managers mainly make rounding or cutoff errors
1227                self.rng.gen_range(3..5)
1228            }
1229            _ => return,
1230        };
1231
1232        match error_type {
1233            0 => {
1234                // Transposed digits in an amount
1235                if let Some(line) = entry.lines.get_mut(0) {
1236                    let is_debit = line.debit_amount > Decimal::ZERO;
1237                    let original_amount = if is_debit {
1238                        line.debit_amount
1239                    } else {
1240                        line.credit_amount
1241                    };
1242
1243                    // Simple digit swap in the string representation
1244                    let s = original_amount.to_string();
1245                    if s.len() >= 2 {
1246                        let chars: Vec<char> = s.chars().collect();
1247                        let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1248                        if chars[pos].is_ascii_digit()
1249                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1250                        {
1251                            let mut new_chars = chars;
1252                            new_chars.swap(pos, pos + 1);
1253                            if let Ok(new_amount) =
1254                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1255                            {
1256                                let impact = new_amount - original_amount;
1257
1258                                // Apply to the modified line
1259                                if is_debit {
1260                                    entry.lines[0].debit_amount = new_amount;
1261                                } else {
1262                                    entry.lines[0].credit_amount = new_amount;
1263                                }
1264
1265                                // Rebalance the entry
1266                                Self::rebalance_entry(entry, is_debit, impact);
1267
1268                                entry.header.header_text = Some(
1269                                    entry.header.header_text.clone().unwrap_or_default()
1270                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1271                                );
1272                            }
1273                        }
1274                    }
1275                }
1276            }
1277            1 => {
1278                // Wrong decimal place (off by factor of 10)
1279                if let Some(line) = entry.lines.get_mut(0) {
1280                    let is_debit = line.debit_amount > Decimal::ZERO;
1281                    let original_amount = if is_debit {
1282                        line.debit_amount
1283                    } else {
1284                        line.credit_amount
1285                    };
1286
1287                    let new_amount = original_amount * Decimal::new(10, 0);
1288                    let impact = new_amount - original_amount;
1289
1290                    // Apply to the modified line
1291                    if is_debit {
1292                        entry.lines[0].debit_amount = new_amount;
1293                    } else {
1294                        entry.lines[0].credit_amount = new_amount;
1295                    }
1296
1297                    // Rebalance the entry
1298                    Self::rebalance_entry(entry, is_debit, impact);
1299
1300                    entry.header.header_text = Some(
1301                        entry.header.header_text.clone().unwrap_or_default()
1302                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1303                    );
1304                }
1305            }
1306            2 => {
1307                // Typo in description (doesn't affect balance)
1308                if let Some(ref mut text) = entry.header.header_text {
1309                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1310                    let correct = ["the", "and", "with", "that", "receive"];
1311                    let idx = self.rng.gen_range(0..typos.len());
1312                    if text.to_lowercase().contains(correct[idx]) {
1313                        *text = text.replace(correct[idx], typos[idx]);
1314                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1315                    }
1316                }
1317            }
1318            3 => {
1319                // Rounding to round number
1320                if let Some(line) = entry.lines.get_mut(0) {
1321                    let is_debit = line.debit_amount > Decimal::ZERO;
1322                    let original_amount = if is_debit {
1323                        line.debit_amount
1324                    } else {
1325                        line.credit_amount
1326                    };
1327
1328                    let new_amount =
1329                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1330                    let impact = new_amount - original_amount;
1331
1332                    // Apply to the modified line
1333                    if is_debit {
1334                        entry.lines[0].debit_amount = new_amount;
1335                    } else {
1336                        entry.lines[0].credit_amount = new_amount;
1337                    }
1338
1339                    // Rebalance the entry
1340                    Self::rebalance_entry(entry, is_debit, impact);
1341
1342                    entry.header.header_text = Some(
1343                        entry.header.header_text.clone().unwrap_or_default()
1344                            + " [HUMAN_ERROR:ROUNDED]",
1345                    );
1346                }
1347            }
1348            4 => {
1349                // Late posting marker (document date much earlier than posting date)
1350                // This doesn't create an imbalance
1351                if entry.header.document_date == entry.header.posting_date {
1352                    let days_late = self.rng.gen_range(5..15);
1353                    entry.header.document_date =
1354                        entry.header.posting_date - chrono::Duration::days(days_late);
1355                    entry.header.header_text = Some(
1356                        entry.header.header_text.clone().unwrap_or_default()
1357                            + " [HUMAN_ERROR:LATE_POSTING]",
1358                    );
1359                }
1360            }
1361            _ => {}
1362        }
1363    }
1364
1365    /// Apply approval workflow for high-value transactions.
1366    ///
1367    /// If the entry amount exceeds the approval threshold, simulate an
1368    /// approval workflow with appropriate approvers based on amount.
1369    fn maybe_apply_approval_workflow(
1370        &mut self,
1371        entry: &mut JournalEntry,
1372        _posting_date: NaiveDate,
1373    ) {
1374        use rust_decimal::Decimal;
1375
1376        let amount = entry.total_debit();
1377
1378        // Skip if amount is below threshold
1379        if amount <= self.approval_threshold {
1380            // Auto-approved below threshold
1381            let workflow = ApprovalWorkflow::auto_approved(
1382                entry.header.created_by.clone(),
1383                entry.header.user_persona.clone(),
1384                amount,
1385                entry.header.created_at,
1386            );
1387            entry.header.approval_workflow = Some(workflow);
1388            return;
1389        }
1390
1391        // Mark as SOX relevant for high-value transactions
1392        entry.header.sox_relevant = true;
1393
1394        // Determine required approval levels based on amount
1395        let required_levels = if amount > Decimal::new(100000, 0) {
1396            3 // Executive approval required
1397        } else if amount > Decimal::new(50000, 0) {
1398            2 // Senior management approval
1399        } else {
1400            1 // Manager approval
1401        };
1402
1403        // Create the approval workflow
1404        let mut workflow = ApprovalWorkflow::new(
1405            entry.header.created_by.clone(),
1406            entry.header.user_persona.clone(),
1407            amount,
1408        );
1409        workflow.required_levels = required_levels;
1410
1411        // Simulate submission
1412        let submit_time = entry.header.created_at;
1413        let submit_action = ApprovalAction::new(
1414            entry.header.created_by.clone(),
1415            entry.header.user_persona.clone(),
1416            self.parse_persona(&entry.header.user_persona),
1417            ApprovalActionType::Submit,
1418            0,
1419        )
1420        .with_timestamp(submit_time);
1421
1422        workflow.actions.push(submit_action);
1423        workflow.status = ApprovalStatus::Pending;
1424        workflow.submitted_at = Some(submit_time);
1425
1426        // Simulate approvals with realistic delays
1427        let mut current_time = submit_time;
1428        for level in 1..=required_levels {
1429            // Add delay for approval (1-3 business hours per level)
1430            let delay_hours = self.rng.gen_range(1..4);
1431            current_time += chrono::Duration::hours(delay_hours);
1432
1433            // Skip weekends
1434            while current_time.weekday() == chrono::Weekday::Sat
1435                || current_time.weekday() == chrono::Weekday::Sun
1436            {
1437                current_time += chrono::Duration::days(1);
1438            }
1439
1440            // Generate approver based on level
1441            let (approver_id, approver_role) = self.select_approver(level);
1442
1443            let approve_action = ApprovalAction::new(
1444                approver_id.clone(),
1445                format!("{:?}", approver_role),
1446                approver_role,
1447                ApprovalActionType::Approve,
1448                level,
1449            )
1450            .with_timestamp(current_time);
1451
1452            workflow.actions.push(approve_action);
1453            workflow.current_level = level;
1454        }
1455
1456        // Mark as approved
1457        workflow.status = ApprovalStatus::Approved;
1458        workflow.approved_at = Some(current_time);
1459
1460        entry.header.approval_workflow = Some(workflow);
1461    }
1462
1463    /// Select an approver based on the required level.
1464    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1465        let persona = match level {
1466            1 => UserPersona::Manager,
1467            2 => UserPersona::Controller,
1468            _ => UserPersona::Executive,
1469        };
1470
1471        // Try to get from user pool first
1472        if let Some(ref pool) = self.user_pool {
1473            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1474                return (user.user_id.clone(), persona);
1475            }
1476        }
1477
1478        // Fallback to generated approver
1479        let approver_id = match persona {
1480            UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1481            UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1482            UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1483            _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1484        };
1485
1486        (approver_id, persona)
1487    }
1488
1489    /// Parse user persona from string.
1490    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1491        match persona_str.to_lowercase().as_str() {
1492            s if s.contains("junior") => UserPersona::JuniorAccountant,
1493            s if s.contains("senior") => UserPersona::SeniorAccountant,
1494            s if s.contains("controller") => UserPersona::Controller,
1495            s if s.contains("manager") => UserPersona::Manager,
1496            s if s.contains("executive") => UserPersona::Executive,
1497            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1498            _ => UserPersona::JuniorAccountant, // Default
1499        }
1500    }
1501
1502    /// Enable or disable approval workflow.
1503    pub fn with_approval(mut self, enabled: bool) -> Self {
1504        self.approval_enabled = enabled;
1505        self
1506    }
1507
1508    /// Set the approval threshold amount.
1509    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1510        self.approval_threshold = threshold;
1511        self
1512    }
1513
1514    /// Set the temporal drift controller for simulating distribution changes over time.
1515    ///
1516    /// When drift is enabled, amounts and other distributions will shift based on
1517    /// the period (month) to simulate realistic temporal evolution like inflation
1518    /// or increasing fraud rates.
1519    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1520        self.drift_controller = Some(controller);
1521        self
1522    }
1523
1524    /// Set drift configuration directly.
1525    ///
1526    /// Creates a drift controller from the config. Total periods is calculated
1527    /// from the date range.
1528    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1529        if config.enabled {
1530            let total_periods = self.calculate_total_periods();
1531            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1532        }
1533        self
1534    }
1535
1536    /// Calculate total periods (months) in the date range.
1537    fn calculate_total_periods(&self) -> u32 {
1538        let start_year = self.start_date.year();
1539        let start_month = self.start_date.month();
1540        let end_year = self.end_date.year();
1541        let end_month = self.end_date.month();
1542
1543        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1544    }
1545
1546    /// Calculate the period number (0-indexed) for a given date.
1547    fn date_to_period(&self, date: NaiveDate) -> u32 {
1548        let start_year = self.start_date.year();
1549        let start_month = self.start_date.month() as i32;
1550        let date_year = date.year();
1551        let date_month = date.month() as i32;
1552
1553        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1554    }
1555
1556    /// Get drift adjustments for a given date.
1557    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1558        if let Some(ref controller) = self.drift_controller {
1559            let period = self.date_to_period(date);
1560            controller.compute_adjustments(period)
1561        } else {
1562            DriftAdjustments::none()
1563        }
1564    }
1565
1566    /// Select a user from the pool or generate a generic user ID.
1567    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1568        if let Some(ref pool) = self.user_pool {
1569            let persona = if is_automated {
1570                UserPersona::AutomatedSystem
1571            } else {
1572                // Random distribution among human personas
1573                let roll: f64 = self.rng.gen();
1574                if roll < 0.4 {
1575                    UserPersona::JuniorAccountant
1576                } else if roll < 0.7 {
1577                    UserPersona::SeniorAccountant
1578                } else if roll < 0.85 {
1579                    UserPersona::Controller
1580                } else {
1581                    UserPersona::Manager
1582                }
1583            };
1584
1585            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1586                return (
1587                    user.user_id.clone(),
1588                    format!("{:?}", user.persona).to_lowercase(),
1589                );
1590            }
1591        }
1592
1593        // Fallback to generic format
1594        if is_automated {
1595            (
1596                format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1597                "automated_system".to_string(),
1598            )
1599        } else {
1600            (
1601                format!("USER{:04}", self.rng.gen_range(1..=40)),
1602                "senior_accountant".to_string(),
1603            )
1604        }
1605    }
1606
1607    /// Select transaction source based on configuration weights.
1608    fn select_source(&mut self) -> TransactionSource {
1609        let roll: f64 = self.rng.gen();
1610        let dist = &self.config.source_distribution;
1611
1612        if roll < dist.manual {
1613            TransactionSource::Manual
1614        } else if roll < dist.manual + dist.automated {
1615            TransactionSource::Automated
1616        } else if roll < dist.manual + dist.automated + dist.recurring {
1617            TransactionSource::Recurring
1618        } else {
1619            TransactionSource::Adjustment
1620        }
1621    }
1622
1623    /// Select a business process based on configuration weights.
1624    fn select_business_process(&mut self) -> BusinessProcess {
1625        let roll: f64 = self.rng.gen();
1626
1627        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1628        if roll < 0.35 {
1629            BusinessProcess::O2C
1630        } else if roll < 0.65 {
1631            BusinessProcess::P2P
1632        } else if roll < 0.85 {
1633            BusinessProcess::R2R
1634        } else if roll < 0.95 {
1635            BusinessProcess::H2R
1636        } else {
1637            BusinessProcess::A2R
1638        }
1639    }
1640
1641    fn select_debit_account(&mut self) -> &GLAccount {
1642        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1643        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1644
1645        // 60% asset, 40% expense for debits
1646        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1647            accounts
1648        } else {
1649            expense_accounts
1650        };
1651
1652        all.choose(&mut self.rng)
1653            .copied()
1654            .unwrap_or_else(|| &self.coa.accounts[0])
1655    }
1656
1657    fn select_credit_account(&mut self) -> &GLAccount {
1658        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1659        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1660
1661        // 60% liability, 40% revenue for credits
1662        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1663            liability_accounts
1664        } else {
1665            revenue_accounts
1666        };
1667
1668        all.choose(&mut self.rng)
1669            .copied()
1670            .unwrap_or_else(|| &self.coa.accounts[0])
1671    }
1672}
1673
1674impl Generator for JournalEntryGenerator {
1675    type Item = JournalEntry;
1676    type Config = (
1677        TransactionConfig,
1678        Arc<ChartOfAccounts>,
1679        Vec<String>,
1680        NaiveDate,
1681        NaiveDate,
1682    );
1683
1684    fn new(config: Self::Config, seed: u64) -> Self {
1685        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1686    }
1687
1688    fn generate_one(&mut self) -> Self::Item {
1689        self.generate()
1690    }
1691
1692    fn reset(&mut self) {
1693        self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1694        self.line_sampler.reset(self.seed + 1);
1695        self.amount_sampler.reset(self.seed + 2);
1696        self.temporal_sampler.reset(self.seed + 3);
1697        self.count = 0;
1698        self.uuid_factory.reset();
1699
1700        // Reset reference generator by recreating it
1701        let mut ref_gen = ReferenceGenerator::new(
1702            self.start_date.year(),
1703            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1704        );
1705        ref_gen.set_prefix(
1706            ReferenceType::Invoice,
1707            &self.template_config.references.invoice_prefix,
1708        );
1709        ref_gen.set_prefix(
1710            ReferenceType::PurchaseOrder,
1711            &self.template_config.references.po_prefix,
1712        );
1713        ref_gen.set_prefix(
1714            ReferenceType::SalesOrder,
1715            &self.template_config.references.so_prefix,
1716        );
1717        self.reference_generator = ref_gen;
1718    }
1719
1720    fn count(&self) -> u64 {
1721        self.count
1722    }
1723
1724    fn seed(&self) -> u64 {
1725        self.seed
1726    }
1727}
1728
1729#[cfg(test)]
1730#[allow(clippy::unwrap_used)]
1731mod tests {
1732    use super::*;
1733    use crate::ChartOfAccountsGenerator;
1734
1735    #[test]
1736    fn test_generate_balanced_entries() {
1737        let mut coa_gen =
1738            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1739        let coa = Arc::new(coa_gen.generate());
1740
1741        let mut je_gen = JournalEntryGenerator::new_with_params(
1742            TransactionConfig::default(),
1743            coa,
1744            vec!["1000".to_string()],
1745            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1746            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1747            42,
1748        );
1749
1750        let mut balanced_count = 0;
1751        for _ in 0..100 {
1752            let entry = je_gen.generate();
1753
1754            // Skip entries with human errors as they may be intentionally unbalanced
1755            let has_human_error = entry
1756                .header
1757                .header_text
1758                .as_ref()
1759                .map(|t| t.contains("[HUMAN_ERROR:"))
1760                .unwrap_or(false);
1761
1762            if !has_human_error {
1763                assert!(
1764                    entry.is_balanced(),
1765                    "Entry {:?} is not balanced",
1766                    entry.header.document_id
1767                );
1768                balanced_count += 1;
1769            }
1770            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1771        }
1772
1773        // Ensure most entries are balanced (human errors are rare)
1774        assert!(
1775            balanced_count >= 80,
1776            "Expected at least 80 balanced entries, got {}",
1777            balanced_count
1778        );
1779    }
1780
1781    #[test]
1782    fn test_deterministic_generation() {
1783        let mut coa_gen =
1784            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1785        let coa = Arc::new(coa_gen.generate());
1786
1787        let mut gen1 = JournalEntryGenerator::new_with_params(
1788            TransactionConfig::default(),
1789            Arc::clone(&coa),
1790            vec!["1000".to_string()],
1791            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1792            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1793            42,
1794        );
1795
1796        let mut gen2 = JournalEntryGenerator::new_with_params(
1797            TransactionConfig::default(),
1798            coa,
1799            vec!["1000".to_string()],
1800            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1801            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1802            42,
1803        );
1804
1805        for _ in 0..50 {
1806            let e1 = gen1.generate();
1807            let e2 = gen2.generate();
1808            assert_eq!(e1.header.document_id, e2.header.document_id);
1809            assert_eq!(e1.total_debit(), e2.total_debit());
1810        }
1811    }
1812
1813    #[test]
1814    fn test_templates_generate_descriptions() {
1815        let mut coa_gen =
1816            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1817        let coa = Arc::new(coa_gen.generate());
1818
1819        // Enable all template features
1820        let template_config = TemplateConfig {
1821            names: datasynth_config::schema::NameTemplateConfig {
1822                generate_realistic_names: true,
1823                email_domain: "test.com".to_string(),
1824                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1825            },
1826            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1827                generate_header_text: true,
1828                generate_line_text: true,
1829            },
1830            references: datasynth_config::schema::ReferenceTemplateConfig {
1831                generate_references: true,
1832                invoice_prefix: "TEST-INV".to_string(),
1833                po_prefix: "TEST-PO".to_string(),
1834                so_prefix: "TEST-SO".to_string(),
1835            },
1836        };
1837
1838        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1839            TransactionConfig::default(),
1840            coa,
1841            vec!["1000".to_string()],
1842            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1843            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1844            42,
1845            template_config,
1846            None,
1847        )
1848        .with_persona_errors(false); // Disable for template testing
1849
1850        for _ in 0..10 {
1851            let entry = je_gen.generate();
1852
1853            // Verify header text is populated
1854            assert!(
1855                entry.header.header_text.is_some(),
1856                "Header text should be populated"
1857            );
1858
1859            // Verify reference is populated
1860            assert!(
1861                entry.header.reference.is_some(),
1862                "Reference should be populated"
1863            );
1864
1865            // Verify business process is set
1866            assert!(
1867                entry.header.business_process.is_some(),
1868                "Business process should be set"
1869            );
1870
1871            // Verify line text is populated
1872            for line in &entry.lines {
1873                assert!(line.line_text.is_some(), "Line text should be populated");
1874            }
1875
1876            // Entry should still be balanced
1877            assert!(entry.is_balanced());
1878        }
1879    }
1880
1881    #[test]
1882    fn test_user_pool_integration() {
1883        let mut coa_gen =
1884            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1885        let coa = Arc::new(coa_gen.generate());
1886
1887        let companies = vec!["1000".to_string()];
1888
1889        // Generate user pool
1890        let mut user_gen = crate::UserGenerator::new(42);
1891        let user_pool = user_gen.generate_standard(&companies);
1892
1893        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1894            TransactionConfig::default(),
1895            coa,
1896            companies,
1897            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1898            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1899            42,
1900            TemplateConfig::default(),
1901            Some(user_pool),
1902        );
1903
1904        // Generate entries and verify user IDs are from pool
1905        for _ in 0..20 {
1906            let entry = je_gen.generate();
1907
1908            // User ID should not be generic BATCH/USER format when pool is used
1909            // (though it may still fall back if random selection misses)
1910            assert!(!entry.header.created_by.is_empty());
1911        }
1912    }
1913
1914    #[test]
1915    fn test_master_data_connection() {
1916        let mut coa_gen =
1917            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1918        let coa = Arc::new(coa_gen.generate());
1919
1920        // Create test vendors
1921        let vendors = vec![
1922            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1923            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1924        ];
1925
1926        // Create test customers
1927        let customers = vec![
1928            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1929            Customer::new(
1930                "C-TEST-002",
1931                "Test Customer Two",
1932                CustomerType::SmallBusiness,
1933            ),
1934        ];
1935
1936        // Create test materials
1937        let materials = vec![Material::new(
1938            "MAT-TEST-001",
1939            "Test Material A",
1940            MaterialType::RawMaterial,
1941        )];
1942
1943        // Create generator with master data
1944        let generator = JournalEntryGenerator::new_with_params(
1945            TransactionConfig::default(),
1946            coa,
1947            vec!["1000".to_string()],
1948            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1949            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1950            42,
1951        );
1952
1953        // Without master data
1954        assert!(!generator.is_using_real_master_data());
1955
1956        // Connect master data
1957        let generator_with_data = generator
1958            .with_vendors(&vendors)
1959            .with_customers(&customers)
1960            .with_materials(&materials);
1961
1962        // Should now be using real master data
1963        assert!(generator_with_data.is_using_real_master_data());
1964    }
1965
1966    #[test]
1967    fn test_with_master_data_convenience_method() {
1968        let mut coa_gen =
1969            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1970        let coa = Arc::new(coa_gen.generate());
1971
1972        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1973        let customers = vec![Customer::new(
1974            "C-001",
1975            "Customer One",
1976            CustomerType::Corporate,
1977        )];
1978        let materials = vec![Material::new(
1979            "MAT-001",
1980            "Material One",
1981            MaterialType::RawMaterial,
1982        )];
1983
1984        let generator = JournalEntryGenerator::new_with_params(
1985            TransactionConfig::default(),
1986            coa,
1987            vec!["1000".to_string()],
1988            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1989            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1990            42,
1991        )
1992        .with_master_data(&vendors, &customers, &materials);
1993
1994        assert!(generator.is_using_real_master_data());
1995    }
1996
1997    #[test]
1998    fn test_stress_factors_increase_error_rate() {
1999        let mut coa_gen =
2000            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2001        let coa = Arc::new(coa_gen.generate());
2002
2003        let generator = JournalEntryGenerator::new_with_params(
2004            TransactionConfig::default(),
2005            coa,
2006            vec!["1000".to_string()],
2007            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2008            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2009            42,
2010        );
2011
2012        let base_rate = 0.1;
2013
2014        // Regular day - no stress factors
2015        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2016        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2017        assert!(
2018            (regular_rate - base_rate).abs() < 0.01,
2019            "Regular day should have minimal stress factor adjustment"
2020        );
2021
2022        // Month end - 50% more errors
2023        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2024        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2025        assert!(
2026            month_end_rate > regular_rate,
2027            "Month end should have higher error rate than regular day"
2028        );
2029
2030        // Year end - double the error rate
2031        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2032        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2033        assert!(
2034            year_end_rate > month_end_rate,
2035            "Year end should have highest error rate"
2036        );
2037
2038        // Friday stress
2039        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2040        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2041        assert!(
2042            friday_rate > regular_rate,
2043            "Friday should have higher error rate than mid-week"
2044        );
2045
2046        // Monday stress
2047        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2048        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2049        assert!(
2050            monday_rate > regular_rate,
2051            "Monday should have higher error rate than mid-week"
2052        );
2053    }
2054
2055    #[test]
2056    fn test_batching_produces_similar_entries() {
2057        let mut coa_gen =
2058            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2059        let coa = Arc::new(coa_gen.generate());
2060
2061        // Use seed 123 which is more likely to trigger batching
2062        let mut je_gen = JournalEntryGenerator::new_with_params(
2063            TransactionConfig::default(),
2064            coa,
2065            vec!["1000".to_string()],
2066            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2067            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2068            123,
2069        )
2070        .with_persona_errors(false); // Disable to ensure balanced entries
2071
2072        // Generate many entries - at 15% batch rate, should see some batches
2073        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2074
2075        // Check that all entries are balanced (batched or not)
2076        for entry in &entries {
2077            assert!(
2078                entry.is_balanced(),
2079                "All entries including batched should be balanced"
2080            );
2081        }
2082
2083        // Count entries with same-day posting dates (batch indicator)
2084        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2085            std::collections::HashMap::new();
2086        for entry in &entries {
2087            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2088        }
2089
2090        // With batching, some dates should have multiple entries
2091        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2092        assert!(
2093            dates_with_multiple > 0,
2094            "With batching, should see some dates with multiple entries"
2095        );
2096    }
2097
2098    #[test]
2099    fn test_temporal_patterns_business_days() {
2100        use datasynth_config::schema::{
2101            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2102        };
2103
2104        let mut coa_gen =
2105            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2106        let coa = Arc::new(coa_gen.generate());
2107
2108        // Create temporal patterns config with business days enabled
2109        let temporal_config = TemporalPatternsConfig {
2110            enabled: true,
2111            business_days: BusinessDaySchemaConfig {
2112                enabled: true,
2113                ..Default::default()
2114            },
2115            calendars: CalendarSchemaConfig {
2116                regions: vec!["US".to_string()],
2117                custom_holidays: vec![],
2118            },
2119            ..Default::default()
2120        };
2121
2122        let mut je_gen = JournalEntryGenerator::new_with_params(
2123            TransactionConfig::default(),
2124            coa,
2125            vec!["1000".to_string()],
2126            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2127            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2128            42,
2129        )
2130        .with_temporal_patterns(temporal_config, 42)
2131        .with_persona_errors(false);
2132
2133        // Generate entries and verify none fall on weekends
2134        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2135
2136        for entry in &entries {
2137            let weekday = entry.header.posting_date.weekday();
2138            assert!(
2139                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2140                "Posting date {:?} should not be a weekend",
2141                entry.header.posting_date
2142            );
2143        }
2144    }
2145}