Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172        );
173        ref_gen.set_prefix(
174            ReferenceType::Invoice,
175            &template_config.references.invoice_prefix,
176        );
177        ref_gen.set_prefix(
178            ReferenceType::PurchaseOrder,
179            &template_config.references.po_prefix,
180        );
181        ref_gen.set_prefix(
182            ReferenceType::SalesOrder,
183            &template_config.references.so_prefix,
184        );
185
186        // Create weighted company selector (uniform weights for this constructor)
187        let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189        Self {
190            rng: seeded_rng(seed, 0),
191            seed,
192            config: config.clone(),
193            coa,
194            companies,
195            company_selector,
196            line_sampler: LineItemSampler::with_config(
197                seed + 1,
198                config.line_item_distribution.clone(),
199                config.even_odd_distribution.clone(),
200                config.debit_credit_distribution.clone(),
201            ),
202            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203            temporal_sampler: TemporalSampler::with_config(
204                seed + 3,
205                config.seasonality.clone(),
206                WorkingHoursConfig::default(),
207                Vec::new(),
208            ),
209            start_date,
210            end_date,
211            count: 0,
212            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213            user_pool,
214            description_generator: DescriptionGenerator::new(),
215            reference_generator: ref_gen,
216            template_config,
217            vendor_pool: VendorPool::standard(),
218            customer_pool: CustomerPool::standard(),
219            material_pool: None,
220            using_real_master_data: false,
221            fraud_config: FraudConfig::default(),
222            persona_errors_enabled: true, // Enable by default for realism
223            approval_enabled: true,       // Enable by default for realism
224            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
225            batch_state: None,
226            drift_controller: None,
227            business_day_calculator: None,
228            processing_lag_calculator: None,
229            temporal_patterns_config: None,
230        }
231    }
232
233    /// Create from a full GeneratorConfig.
234    ///
235    /// This constructor uses the volume_weight from company configs
236    /// for weighted company selection, and fraud config from GeneratorConfig.
237    pub fn from_generator_config(
238        full_config: &GeneratorConfig,
239        coa: Arc<ChartOfAccounts>,
240        start_date: NaiveDate,
241        end_date: NaiveDate,
242        seed: u64,
243    ) -> Self {
244        let companies: Vec<String> = full_config
245            .companies
246            .iter()
247            .map(|c| c.code.clone())
248            .collect();
249
250        // Create weighted selector using volume_weight from company configs
251        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253        let mut generator = Self::new_with_full_config(
254            full_config.transactions.clone(),
255            coa,
256            companies,
257            start_date,
258            end_date,
259            seed,
260            full_config.templates.clone(),
261            None,
262        );
263
264        // Override the uniform selector with weighted selector
265        generator.company_selector = company_selector;
266
267        // Set fraud config
268        generator.fraud_config = full_config.fraud.clone();
269
270        // Configure temporal patterns if enabled
271        let temporal_config = &full_config.temporal_patterns;
272        if temporal_config.enabled {
273            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274        }
275
276        generator
277    }
278
279    /// Configure temporal patterns including business day calculations and processing lags.
280    ///
281    /// This enables realistic temporal behavior including:
282    /// - Business day awareness (no postings on weekends/holidays)
283    /// - Processing lag modeling (event-to-posting delays)
284    /// - Period-end dynamics (volume spikes at month/quarter/year end)
285    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286        // Create business day calculator if enabled
287        if config.business_days.enabled {
288            let region = config
289                .calendars
290                .regions
291                .first()
292                .map(|r| Self::parse_region(r))
293                .unwrap_or(Region::US);
294
295            let calendar = HolidayCalendar::new(region, self.start_date.year());
296            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297        }
298
299        // Create processing lag calculator if enabled
300        if config.processing_lags.enabled {
301            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302            self.processing_lag_calculator =
303                Some(ProcessingLagCalculator::with_config(seed, lag_config));
304        }
305
306        // Create period-end dynamics if configured
307        let model = config.period_end.model.as_deref().unwrap_or("flat");
308        if model != "flat"
309            || config
310                .period_end
311                .month_end
312                .as_ref()
313                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314        {
315            let dynamics = Self::convert_period_end_config(&config.period_end);
316            self.temporal_sampler.set_period_end_dynamics(dynamics);
317        }
318
319        self.temporal_patterns_config = Some(config);
320        self
321    }
322
323    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
324    ///
325    /// This is an alternative to [`with_temporal_patterns`] that derives the
326    /// holiday calendar from a country-pack definition rather than the built-in
327    /// region-based calendars.  All other temporal behaviour (business-day
328    /// adjustment, processing lags, period-end dynamics) is configured
329    /// identically.
330    pub fn with_country_pack_temporal(
331        mut self,
332        config: TemporalPatternsConfig,
333        seed: u64,
334        pack: &CountryPack,
335    ) -> Self {
336        // Create business day calculator using the country pack calendar
337        if config.business_days.enabled {
338            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340        }
341
342        // Create processing lag calculator if enabled
343        if config.processing_lags.enabled {
344            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345            self.processing_lag_calculator =
346                Some(ProcessingLagCalculator::with_config(seed, lag_config));
347        }
348
349        // Create period-end dynamics if configured
350        let model = config.period_end.model.as_deref().unwrap_or("flat");
351        if model != "flat"
352            || config
353                .period_end
354                .month_end
355                .as_ref()
356                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357        {
358            let dynamics = Self::convert_period_end_config(&config.period_end);
359            self.temporal_sampler.set_period_end_dynamics(dynamics);
360        }
361
362        self.temporal_patterns_config = Some(config);
363        self
364    }
365
366    /// Convert schema processing lag config to core config.
367    fn convert_processing_lag_config(
368        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369    ) -> ProcessingLagConfig {
370        let mut config = ProcessingLagConfig {
371            enabled: schema.enabled,
372            ..Default::default()
373        };
374
375        // Helper to convert lag schema to distribution
376        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378            if let Some(min) = lag.min_hours {
379                dist.min_lag_hours = min;
380            }
381            if let Some(max) = lag.max_hours {
382                dist.max_lag_hours = max;
383            }
384            dist
385        };
386
387        // Apply event-specific lags
388        if let Some(ref lag) = schema.sales_order_lag {
389            config
390                .event_lags
391                .insert(EventType::SalesOrder, convert_lag(lag));
392        }
393        if let Some(ref lag) = schema.purchase_order_lag {
394            config
395                .event_lags
396                .insert(EventType::PurchaseOrder, convert_lag(lag));
397        }
398        if let Some(ref lag) = schema.goods_receipt_lag {
399            config
400                .event_lags
401                .insert(EventType::GoodsReceipt, convert_lag(lag));
402        }
403        if let Some(ref lag) = schema.invoice_receipt_lag {
404            config
405                .event_lags
406                .insert(EventType::InvoiceReceipt, convert_lag(lag));
407        }
408        if let Some(ref lag) = schema.invoice_issue_lag {
409            config
410                .event_lags
411                .insert(EventType::InvoiceIssue, convert_lag(lag));
412        }
413        if let Some(ref lag) = schema.payment_lag {
414            config
415                .event_lags
416                .insert(EventType::Payment, convert_lag(lag));
417        }
418        if let Some(ref lag) = schema.journal_entry_lag {
419            config
420                .event_lags
421                .insert(EventType::JournalEntry, convert_lag(lag));
422        }
423
424        // Apply cross-day posting config
425        if let Some(ref cross_day) = schema.cross_day_posting {
426            config.cross_day = CrossDayConfig {
427                enabled: cross_day.enabled,
428                probability_by_hour: cross_day.probability_by_hour.clone(),
429                ..Default::default()
430            };
431        }
432
433        config
434    }
435
436    /// Convert schema period-end config to core PeriodEndDynamics.
437    fn convert_period_end_config(
438        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439    ) -> PeriodEndDynamics {
440        let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442        // Helper to convert period config
443        let convert_period =
444            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445             default_peak: f64|
446             -> PeriodEndConfig {
447                if let Some(p) = period {
448                    let model = match model_type {
449                        "flat" => PeriodEndModel::FlatMultiplier {
450                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
451                        },
452                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453                            start_day: p.start_day.unwrap_or(-10),
454                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
455                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456                            ramp_up_days: 3, // Default ramp-up period
457                        },
458                        _ => PeriodEndModel::ExponentialAcceleration {
459                            start_day: p.start_day.unwrap_or(-10),
460                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
461                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                            decay_rate: p.decay_rate.unwrap_or(0.3),
463                        },
464                    };
465                    PeriodEndConfig {
466                        enabled: true,
467                        model,
468                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469                    }
470                } else {
471                    PeriodEndConfig {
472                        enabled: true,
473                        model: PeriodEndModel::ExponentialAcceleration {
474                            start_day: -10,
475                            base_multiplier: 1.0,
476                            peak_multiplier: default_peak,
477                            decay_rate: 0.3,
478                        },
479                        additional_multiplier: 1.0,
480                    }
481                }
482            };
483
484        PeriodEndDynamics::new(
485            convert_period(schema.month_end.as_ref(), 2.0),
486            convert_period(schema.quarter_end.as_ref(), 3.5),
487            convert_period(schema.year_end.as_ref(), 5.0),
488        )
489    }
490
491    /// Parse a region string into a Region enum.
492    fn parse_region(region_str: &str) -> Region {
493        match region_str.to_uppercase().as_str() {
494            "US" => Region::US,
495            "DE" => Region::DE,
496            "GB" => Region::GB,
497            "CN" => Region::CN,
498            "JP" => Region::JP,
499            "IN" => Region::IN,
500            "BR" => Region::BR,
501            "MX" => Region::MX,
502            "AU" => Region::AU,
503            "SG" => Region::SG,
504            "KR" => Region::KR,
505            _ => Region::US,
506        }
507    }
508
509    /// Set a custom company selector.
510    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
511        self.company_selector = selector;
512    }
513
514    /// Get the current company selector.
515    pub fn company_selector(&self) -> &WeightedCompanySelector {
516        &self.company_selector
517    }
518
519    /// Set fraud configuration.
520    pub fn set_fraud_config(&mut self, config: FraudConfig) {
521        self.fraud_config = config;
522    }
523
524    /// Set vendors from generated master data.
525    ///
526    /// This replaces the default vendor pool with actual generated vendors,
527    /// ensuring JEs reference real master data entities.
528    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
529        if !vendors.is_empty() {
530            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
531            self.using_real_master_data = true;
532        }
533        self
534    }
535
536    /// Set customers from generated master data.
537    ///
538    /// This replaces the default customer pool with actual generated customers,
539    /// ensuring JEs reference real master data entities.
540    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
541        if !customers.is_empty() {
542            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
543            self.using_real_master_data = true;
544        }
545        self
546    }
547
548    /// Set materials from generated master data.
549    ///
550    /// This provides material references for JEs that involve inventory movements.
551    pub fn with_materials(mut self, materials: &[Material]) -> Self {
552        if !materials.is_empty() {
553            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
554            self.using_real_master_data = true;
555        }
556        self
557    }
558
559    /// Set all master data at once for convenience.
560    ///
561    /// This is the recommended way to configure the JE generator with
562    /// generated master data to ensure data coherence.
563    pub fn with_master_data(
564        self,
565        vendors: &[Vendor],
566        customers: &[Customer],
567        materials: &[Material],
568    ) -> Self {
569        self.with_vendors(vendors)
570            .with_customers(customers)
571            .with_materials(materials)
572    }
573
574    /// Replace the user pool with one generated from a [`CountryPack`].
575    ///
576    /// This is an alternative to the default name-culture distribution that
577    /// derives name pools and weights from the country-pack's `names` section.
578    /// The existing user pool (if any) is discarded and regenerated using
579    /// [`MultiCultureNameGenerator::from_country_pack`].
580    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
581        let name_gen =
582            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
583        let config = UserGeneratorConfig {
584            // The culture distribution is embedded in the name generator
585            // itself, so we use an empty list here.
586            culture_distribution: Vec::new(),
587            email_domain: name_gen.email_domain().to_string(),
588            generate_realistic_names: true,
589        };
590        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
591        self.user_pool = Some(user_gen.generate_standard(&self.companies));
592        self
593    }
594
595    /// Check if the generator is using real master data.
596    pub fn is_using_real_master_data(&self) -> bool {
597        self.using_real_master_data
598    }
599
600    /// Determine if this transaction should be fraudulent.
601    fn determine_fraud(&mut self) -> Option<FraudType> {
602        if !self.fraud_config.enabled {
603            return None;
604        }
605
606        // Roll for fraud based on fraud rate
607        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
608            return None;
609        }
610
611        // Select fraud type based on distribution
612        Some(self.select_fraud_type())
613    }
614
615    /// Select a fraud type based on the configured distribution.
616    fn select_fraud_type(&mut self) -> FraudType {
617        let dist = &self.fraud_config.fraud_type_distribution;
618        let roll: f64 = self.rng.random();
619
620        let mut cumulative = 0.0;
621
622        cumulative += dist.suspense_account_abuse;
623        if roll < cumulative {
624            return FraudType::SuspenseAccountAbuse;
625        }
626
627        cumulative += dist.fictitious_transaction;
628        if roll < cumulative {
629            return FraudType::FictitiousTransaction;
630        }
631
632        cumulative += dist.revenue_manipulation;
633        if roll < cumulative {
634            return FraudType::RevenueManipulation;
635        }
636
637        cumulative += dist.expense_capitalization;
638        if roll < cumulative {
639            return FraudType::ExpenseCapitalization;
640        }
641
642        cumulative += dist.split_transaction;
643        if roll < cumulative {
644            return FraudType::SplitTransaction;
645        }
646
647        cumulative += dist.timing_anomaly;
648        if roll < cumulative {
649            return FraudType::TimingAnomaly;
650        }
651
652        cumulative += dist.unauthorized_access;
653        if roll < cumulative {
654            return FraudType::UnauthorizedAccess;
655        }
656
657        // Default fallback
658        FraudType::DuplicatePayment
659    }
660
661    /// Map a fraud type to an amount pattern for suspicious amounts.
662    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
663        match fraud_type {
664            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
665                FraudAmountPattern::ThresholdAdjacent
666            }
667            FraudType::FictitiousTransaction
668            | FraudType::FictitiousEntry
669            | FraudType::SuspenseAccountAbuse
670            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
671            FraudType::RevenueManipulation
672            | FraudType::ExpenseCapitalization
673            | FraudType::ImproperCapitalization
674            | FraudType::ReserveManipulation
675            | FraudType::UnauthorizedAccess
676            | FraudType::PrematureRevenue
677            | FraudType::UnderstatedLiabilities
678            | FraudType::OverstatedAssets
679            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
680            FraudType::DuplicatePayment
681            | FraudType::TimingAnomaly
682            | FraudType::SelfApproval
683            | FraudType::ExceededApprovalLimit
684            | FraudType::SegregationOfDutiesViolation
685            | FraudType::UnauthorizedApproval
686            | FraudType::CollusiveApproval
687            | FraudType::FictitiousVendor
688            | FraudType::ShellCompanyPayment
689            | FraudType::Kickback
690            | FraudType::KickbackScheme
691            | FraudType::InvoiceManipulation
692            | FraudType::AssetMisappropriation
693            | FraudType::InventoryTheft
694            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
695            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
696            FraudType::ImproperRevenueRecognition
697            | FraudType::ImproperPoAllocation
698            | FraudType::VariableConsiderationManipulation
699            | FraudType::ContractModificationMisstatement => {
700                FraudAmountPattern::StatisticallyImprobable
701            }
702            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
703            FraudType::LeaseClassificationManipulation
704            | FraudType::OffBalanceSheetLease
705            | FraudType::LeaseLiabilityUnderstatement
706            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
707            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
708            FraudType::FairValueHierarchyManipulation
709            | FraudType::Level3InputManipulation
710            | FraudType::ValuationTechniqueManipulation => {
711                FraudAmountPattern::StatisticallyImprobable
712            }
713            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
714            FraudType::DelayedImpairment
715            | FraudType::ImpairmentTestAvoidance
716            | FraudType::CashFlowProjectionManipulation
717            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
718            // Sourcing/Procurement Fraud
719            FraudType::BidRigging
720            | FraudType::PhantomVendorContract
721            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
722            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
723            // HR/Payroll Fraud
724            FraudType::GhostEmployeePayroll
725            | FraudType::PayrollInflation
726            | FraudType::DuplicateExpenseReport
727            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
728            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
729            // O2C Fraud
730            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
731            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
732        }
733    }
734
735    /// Generate a deterministic UUID using the factory.
736    #[inline]
737    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
738        self.uuid_factory.next()
739    }
740
741    /// Generate a single journal entry.
742    pub fn generate(&mut self) -> JournalEntry {
743        debug!(
744            count = self.count,
745            companies = self.companies.len(),
746            start_date = %self.start_date,
747            end_date = %self.end_date,
748            "Generating journal entry"
749        );
750
751        // Check if we're in a batch - if so, generate a batched entry
752        if let Some(ref state) = self.batch_state {
753            if state.remaining > 0 {
754                return self.generate_batched_entry();
755            }
756        }
757
758        self.count += 1;
759
760        // Generate deterministic document ID
761        let document_id = self.generate_deterministic_uuid();
762
763        // Sample posting date
764        let mut posting_date = self
765            .temporal_sampler
766            .sample_date(self.start_date, self.end_date);
767
768        // Adjust posting date to be a business day if business day calculator is configured
769        if let Some(ref calc) = self.business_day_calculator {
770            if !calc.is_business_day(posting_date) {
771                // Move to next business day
772                posting_date = calc.next_business_day(posting_date, false);
773                // Ensure we don't exceed end_date
774                if posting_date > self.end_date {
775                    posting_date = calc.prev_business_day(self.end_date, true);
776                }
777            }
778        }
779
780        // Select company using weighted selector
781        let company_code = self.company_selector.select(&mut self.rng).to_string();
782
783        // Sample line item specification
784        let line_spec = self.line_sampler.sample();
785
786        // Determine source type using full 4-way distribution
787        let source = self.select_source();
788        let is_automated = matches!(
789            source,
790            TransactionSource::Automated | TransactionSource::Recurring
791        );
792
793        // Select business process
794        let business_process = self.select_business_process();
795
796        // Determine if this is a fraudulent transaction
797        let fraud_type = self.determine_fraud();
798        let is_fraud = fraud_type.is_some();
799
800        // Sample time based on source
801        let time = self.temporal_sampler.sample_time(!is_automated);
802        let created_at = posting_date.and_time(time).and_utc();
803
804        // Select user from pool or generate generic
805        let (created_by, user_persona) = self.select_user(is_automated);
806
807        // Create header with deterministic UUID
808        let mut header =
809            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
810        header.created_at = created_at;
811        header.source = source;
812        header.created_by = created_by;
813        header.user_persona = user_persona;
814        header.business_process = Some(business_process);
815        header.is_fraud = is_fraud;
816        header.fraud_type = fraud_type;
817
818        // Generate description context
819        let mut context =
820            DescriptionContext::with_period(posting_date.month(), posting_date.year());
821
822        // Add vendor/customer context based on business process
823        match business_process {
824            BusinessProcess::P2P => {
825                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
826                    context.vendor_name = Some(vendor.name.clone());
827                }
828            }
829            BusinessProcess::O2C => {
830                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
831                    context.customer_name = Some(customer.name.clone());
832                }
833            }
834            _ => {}
835        }
836
837        // Generate header text if enabled
838        if self.template_config.descriptions.generate_header_text {
839            header.header_text = Some(self.description_generator.generate_header_text(
840                business_process,
841                &context,
842                &mut self.rng,
843            ));
844        }
845
846        // Generate reference if enabled
847        if self.template_config.references.generate_references {
848            header.reference = Some(
849                self.reference_generator
850                    .generate_for_process_year(business_process, posting_date.year()),
851            );
852        }
853
854        // Generate line items
855        let mut entry = JournalEntry::new(header);
856
857        // Generate amount - use fraud pattern if this is a fraudulent transaction
858        let base_amount = if let Some(ft) = fraud_type {
859            let pattern = self.fraud_type_to_amount_pattern(ft);
860            self.amount_sampler.sample_fraud(pattern)
861        } else {
862            self.amount_sampler.sample()
863        };
864
865        // Apply temporal drift if configured
866        let drift_adjusted_amount = {
867            let drift = self.get_drift_adjustments(posting_date);
868            if drift.amount_mean_multiplier != 1.0 {
869                // Apply drift multiplier (includes seasonal factor if enabled)
870                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
871                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
872                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
873            } else {
874                base_amount
875            }
876        };
877
878        // Apply human variation to amounts for non-automated transactions
879        let total_amount = if is_automated {
880            drift_adjusted_amount // Automated systems use exact amounts
881        } else {
882            self.apply_human_variation(drift_adjusted_amount)
883        };
884
885        // Generate debit lines
886        let debit_amounts = self
887            .amount_sampler
888            .sample_summing_to(line_spec.debit_count, total_amount);
889        for (i, amount) in debit_amounts.into_iter().enumerate() {
890            let account_number = self.select_debit_account().account_number.clone();
891            let mut line = JournalEntryLine::debit(
892                entry.header.document_id,
893                (i + 1) as u32,
894                account_number.clone(),
895                amount,
896            );
897
898            // Generate line text if enabled
899            if self.template_config.descriptions.generate_line_text {
900                line.line_text = Some(self.description_generator.generate_line_text(
901                    &account_number,
902                    &context,
903                    &mut self.rng,
904                ));
905            }
906
907            entry.add_line(line);
908        }
909
910        // Generate credit lines - use the SAME amounts to ensure balance
911        let credit_amounts = self
912            .amount_sampler
913            .sample_summing_to(line_spec.credit_count, total_amount);
914        for (i, amount) in credit_amounts.into_iter().enumerate() {
915            let account_number = self.select_credit_account().account_number.clone();
916            let mut line = JournalEntryLine::credit(
917                entry.header.document_id,
918                (line_spec.debit_count + i + 1) as u32,
919                account_number.clone(),
920                amount,
921            );
922
923            // Generate line text if enabled
924            if self.template_config.descriptions.generate_line_text {
925                line.line_text = Some(self.description_generator.generate_line_text(
926                    &account_number,
927                    &context,
928                    &mut self.rng,
929                ));
930            }
931
932            entry.add_line(line);
933        }
934
935        // Apply persona-based errors if enabled and it's a human user
936        if self.persona_errors_enabled && !is_automated {
937            self.maybe_inject_persona_error(&mut entry);
938        }
939
940        // Apply approval workflow if enabled and amount exceeds threshold
941        if self.approval_enabled {
942            self.maybe_apply_approval_workflow(&mut entry, posting_date);
943        }
944
945        // Maybe start a batch of similar entries for realism
946        self.maybe_start_batch(&entry);
947
948        entry
949    }
950
951    /// Enable or disable persona-based error injection.
952    ///
953    /// When enabled, entries created by human personas have a chance
954    /// to contain realistic human errors based on their experience level.
955    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
956        self.persona_errors_enabled = enabled;
957        self
958    }
959
960    /// Set fraud configuration for fraud injection.
961    ///
962    /// When fraud is enabled in the config, transactions have a chance
963    /// to be marked as fraudulent based on the configured fraud rate.
964    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
965        self.fraud_config = config;
966        self
967    }
968
969    /// Check if persona errors are enabled.
970    pub fn persona_errors_enabled(&self) -> bool {
971        self.persona_errors_enabled
972    }
973
974    /// Enable or disable batch processing behavior.
975    ///
976    /// When enabled (default), the generator will occasionally produce batches
977    /// of similar entries, simulating how humans batch similar work together.
978    pub fn with_batching(mut self, enabled: bool) -> Self {
979        if !enabled {
980            self.batch_state = None;
981        }
982        self
983    }
984
985    /// Check if batch processing is enabled.
986    pub fn batching_enabled(&self) -> bool {
987        // Batching is implicitly enabled when not explicitly disabled
988        true
989    }
990
991    /// Maybe start a batch based on the current entry.
992    ///
993    /// Humans often batch similar work: processing invoices from one vendor,
994    /// entering expense reports for a trip, reconciling similar items.
995    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
996        // Only start batch for non-automated, non-fraud entries
997        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
998            return;
999        }
1000
1001        // 15% chance to start a batch (most work is not batched)
1002        if self.rng.random::<f64>() > 0.15 {
1003            return;
1004        }
1005
1006        // Extract key attributes for batching
1007        let base_account = entry
1008            .lines
1009            .first()
1010            .map(|l| l.gl_account.clone())
1011            .unwrap_or_default();
1012
1013        let base_amount = entry.total_debit();
1014
1015        self.batch_state = Some(BatchState {
1016            base_account_number: base_account,
1017            base_amount,
1018            base_business_process: entry.header.business_process,
1019            base_posting_date: entry.header.posting_date,
1020            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1021        });
1022    }
1023
1024    /// Generate an entry that's part of the current batch.
1025    ///
1026    /// Batched entries have:
1027    /// - Same or very similar business process
1028    /// - Same posting date (batched work done together)
1029    /// - Similar amounts (within ±15%)
1030    /// - Same debit account (processing similar items)
1031    fn generate_batched_entry(&mut self) -> JournalEntry {
1032        use rust_decimal::Decimal;
1033
1034        // Decrement batch counter
1035        if let Some(ref mut state) = self.batch_state {
1036            state.remaining = state.remaining.saturating_sub(1);
1037        }
1038
1039        let Some(batch) = self.batch_state.clone() else {
1040            // This is a programming error - batch_state should be set before calling this method.
1041            // Clear state and fall back to generating a standard entry instead of panicking.
1042            tracing::warn!(
1043                "generate_batched_entry called without batch_state; generating standard entry"
1044            );
1045            self.batch_state = None;
1046            return self.generate();
1047        };
1048
1049        // Use the batch's posting date (work done on same day)
1050        let posting_date = batch.base_posting_date;
1051
1052        self.count += 1;
1053        let document_id = self.generate_deterministic_uuid();
1054
1055        // Select same company (batched work is usually same company)
1056        let company_code = self.company_selector.select(&mut self.rng).to_string();
1057
1058        // Use simplified line spec for batched entries (usually 2-line)
1059        let _line_spec = LineItemSpec {
1060            total_count: 2,
1061            debit_count: 1,
1062            credit_count: 1,
1063            split_type: DebitCreditSplit::Equal,
1064        };
1065
1066        // Batched entries are always manual
1067        let source = TransactionSource::Manual;
1068
1069        // Use the batch's business process
1070        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1071
1072        // Sample time
1073        let time = self.temporal_sampler.sample_time(true);
1074        let created_at = posting_date.and_time(time).and_utc();
1075
1076        // Same user for batched work
1077        let (created_by, user_persona) = self.select_user(false);
1078
1079        // Create header
1080        let mut header =
1081            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1082        header.created_at = created_at;
1083        header.source = source;
1084        header.created_by = created_by;
1085        header.user_persona = user_persona;
1086        header.business_process = Some(business_process);
1087
1088        // Generate similar amount (within ±15% of base)
1089        let variation = self.rng.random_range(-0.15..0.15);
1090        let varied_amount =
1091            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1092        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1093
1094        // Create the entry
1095        let mut entry = JournalEntry::new(header);
1096
1097        // Use same debit account as batch base
1098        let debit_line = JournalEntryLine::debit(
1099            entry.header.document_id,
1100            1,
1101            batch.base_account_number.clone(),
1102            total_amount,
1103        );
1104        entry.add_line(debit_line);
1105
1106        // Select a credit account
1107        let credit_account = self.select_credit_account().account_number.clone();
1108        let credit_line =
1109            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1110        entry.add_line(credit_line);
1111
1112        // Apply persona-based errors if enabled
1113        if self.persona_errors_enabled {
1114            self.maybe_inject_persona_error(&mut entry);
1115        }
1116
1117        // Apply approval workflow if enabled
1118        if self.approval_enabled {
1119            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1120        }
1121
1122        // Clear batch state if no more entries remaining
1123        if batch.remaining <= 1 {
1124            self.batch_state = None;
1125        }
1126
1127        entry
1128    }
1129
1130    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1131    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1132        // Parse persona from the entry header
1133        let persona_str = &entry.header.user_persona;
1134        let persona = match persona_str.to_lowercase().as_str() {
1135            s if s.contains("junior") => UserPersona::JuniorAccountant,
1136            s if s.contains("senior") => UserPersona::SeniorAccountant,
1137            s if s.contains("controller") => UserPersona::Controller,
1138            s if s.contains("manager") => UserPersona::Manager,
1139            s if s.contains("executive") => UserPersona::Executive,
1140            _ => return, // Don't inject errors for unknown personas
1141        };
1142
1143        // Get base error rate from persona
1144        let base_error_rate = persona.error_rate();
1145
1146        // Apply stress factors based on posting date
1147        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1148
1149        // Check if error should occur based on adjusted rate
1150        if self.rng.random::<f64>() >= adjusted_rate {
1151            return; // No error this time
1152        }
1153
1154        // Select and inject persona-appropriate error
1155        self.inject_human_error(entry, persona);
1156    }
1157
1158    /// Apply contextual stress factors to the base error rate.
1159    ///
1160    /// Stress factors increase error likelihood during:
1161    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1162    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1163    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1164    /// - Monday morning (catch-up work): 20% more errors
1165    /// - Friday afternoon (rushing to leave): 30% more errors
1166    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1167        use chrono::Datelike;
1168
1169        let mut rate = base_rate;
1170        let day = posting_date.day();
1171        let month = posting_date.month();
1172
1173        // Year-end stress (December 28-31): double the error rate
1174        if month == 12 && day >= 28 {
1175            rate *= 2.0;
1176            return rate.min(0.5); // Cap at 50% to keep it realistic
1177        }
1178
1179        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1180        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1181            rate *= 1.75; // 75% more errors at quarter end
1182            return rate.min(0.4);
1183        }
1184
1185        // Month-end stress (last 3 days of month)
1186        if day >= 28 {
1187            rate *= 1.5; // 50% more errors at month end
1188        }
1189
1190        // Day-of-week stress effects
1191        let weekday = posting_date.weekday();
1192        match weekday {
1193            chrono::Weekday::Mon => {
1194                // Monday: catching up, often rushed
1195                rate *= 1.2;
1196            }
1197            chrono::Weekday::Fri => {
1198                // Friday: rushing to finish before weekend
1199                rate *= 1.3;
1200            }
1201            _ => {}
1202        }
1203
1204        // Cap at 40% to keep it realistic
1205        rate.min(0.4)
1206    }
1207
1208    /// Apply human-like variation to an amount.
1209    ///
1210    /// Humans don't enter perfectly calculated amounts - they:
1211    /// - Round amounts differently
1212    /// - Estimate instead of calculating exactly
1213    /// - Make small input variations
1214    ///
1215    /// This applies small variations (typically ±2%) to make amounts more realistic.
1216    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1217        use rust_decimal::Decimal;
1218
1219        // Automated transactions or very small amounts don't get variation
1220        if amount < Decimal::from(10) {
1221            return amount;
1222        }
1223
1224        // 70% chance of human variation being applied
1225        if self.rng.random::<f64>() > 0.70 {
1226            return amount;
1227        }
1228
1229        // Decide which type of human variation to apply
1230        let variation_type: u8 = self.rng.random_range(0..4);
1231
1232        match variation_type {
1233            0 => {
1234                // ±2% variation (common for estimated amounts)
1235                let variation_pct = self.rng.random_range(-0.02..0.02);
1236                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1237                (amount + variation).round_dp(2)
1238            }
1239            1 => {
1240                // Round to nearest $10
1241                let ten = Decimal::from(10);
1242                (amount / ten).round() * ten
1243            }
1244            2 => {
1245                // Round to nearest $100 (for larger amounts)
1246                if amount >= Decimal::from(500) {
1247                    let hundred = Decimal::from(100);
1248                    (amount / hundred).round() * hundred
1249                } else {
1250                    amount
1251                }
1252            }
1253            3 => {
1254                // Slight under/over payment (±$0.01 to ±$1.00)
1255                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1256                (amount + cents).max(Decimal::ZERO).round_dp(2)
1257            }
1258            _ => amount,
1259        }
1260    }
1261
1262    /// Rebalance an entry after a one-sided amount modification.
1263    ///
1264    /// When an error modifies one line's amount, this finds a line on the opposite
1265    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1266    /// same impact to maintain balance.
1267    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1268        // Find a line on the opposite side to adjust
1269        let balancing_idx = entry.lines.iter().position(|l| {
1270            if modified_was_debit {
1271                l.credit_amount > Decimal::ZERO
1272            } else {
1273                l.debit_amount > Decimal::ZERO
1274            }
1275        });
1276
1277        if let Some(idx) = balancing_idx {
1278            if modified_was_debit {
1279                entry.lines[idx].credit_amount += impact;
1280            } else {
1281                entry.lines[idx].debit_amount += impact;
1282            }
1283        }
1284    }
1285
1286    /// Inject a human-like error based on the persona.
1287    ///
1288    /// All error types maintain balance - amount modifications are applied to both sides.
1289    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1290    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1291        use rust_decimal::Decimal;
1292
1293        // Different personas make different types of errors
1294        let error_type: u8 = match persona {
1295            UserPersona::JuniorAccountant => {
1296                // Junior accountants make more varied errors
1297                self.rng.random_range(0..5)
1298            }
1299            UserPersona::SeniorAccountant => {
1300                // Senior accountants mainly make transposition errors
1301                self.rng.random_range(0..3)
1302            }
1303            UserPersona::Controller | UserPersona::Manager => {
1304                // Controllers/managers mainly make rounding or cutoff errors
1305                self.rng.random_range(3..5)
1306            }
1307            _ => return,
1308        };
1309
1310        match error_type {
1311            0 => {
1312                // Transposed digits in an amount
1313                if let Some(line) = entry.lines.get_mut(0) {
1314                    let is_debit = line.debit_amount > Decimal::ZERO;
1315                    let original_amount = if is_debit {
1316                        line.debit_amount
1317                    } else {
1318                        line.credit_amount
1319                    };
1320
1321                    // Simple digit swap in the string representation
1322                    let s = original_amount.to_string();
1323                    if s.len() >= 2 {
1324                        let chars: Vec<char> = s.chars().collect();
1325                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1326                        if chars[pos].is_ascii_digit()
1327                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1328                        {
1329                            let mut new_chars = chars;
1330                            new_chars.swap(pos, pos + 1);
1331                            if let Ok(new_amount) =
1332                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1333                            {
1334                                let impact = new_amount - original_amount;
1335
1336                                // Apply to the modified line
1337                                if is_debit {
1338                                    entry.lines[0].debit_amount = new_amount;
1339                                } else {
1340                                    entry.lines[0].credit_amount = new_amount;
1341                                }
1342
1343                                // Rebalance the entry
1344                                Self::rebalance_entry(entry, is_debit, impact);
1345
1346                                entry.header.header_text = Some(
1347                                    entry.header.header_text.clone().unwrap_or_default()
1348                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1349                                );
1350                            }
1351                        }
1352                    }
1353                }
1354            }
1355            1 => {
1356                // Wrong decimal place (off by factor of 10)
1357                if let Some(line) = entry.lines.get_mut(0) {
1358                    let is_debit = line.debit_amount > Decimal::ZERO;
1359                    let original_amount = if is_debit {
1360                        line.debit_amount
1361                    } else {
1362                        line.credit_amount
1363                    };
1364
1365                    let new_amount = original_amount * Decimal::new(10, 0);
1366                    let impact = new_amount - original_amount;
1367
1368                    // Apply to the modified line
1369                    if is_debit {
1370                        entry.lines[0].debit_amount = new_amount;
1371                    } else {
1372                        entry.lines[0].credit_amount = new_amount;
1373                    }
1374
1375                    // Rebalance the entry
1376                    Self::rebalance_entry(entry, is_debit, impact);
1377
1378                    entry.header.header_text = Some(
1379                        entry.header.header_text.clone().unwrap_or_default()
1380                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1381                    );
1382                }
1383            }
1384            2 => {
1385                // Typo in description (doesn't affect balance)
1386                if let Some(ref mut text) = entry.header.header_text {
1387                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1388                    let correct = ["the", "and", "with", "that", "receive"];
1389                    let idx = self.rng.random_range(0..typos.len());
1390                    if text.to_lowercase().contains(correct[idx]) {
1391                        *text = text.replace(correct[idx], typos[idx]);
1392                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1393                    }
1394                }
1395            }
1396            3 => {
1397                // Rounding to round number
1398                if let Some(line) = entry.lines.get_mut(0) {
1399                    let is_debit = line.debit_amount > Decimal::ZERO;
1400                    let original_amount = if is_debit {
1401                        line.debit_amount
1402                    } else {
1403                        line.credit_amount
1404                    };
1405
1406                    let new_amount =
1407                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1408                    let impact = new_amount - original_amount;
1409
1410                    // Apply to the modified line
1411                    if is_debit {
1412                        entry.lines[0].debit_amount = new_amount;
1413                    } else {
1414                        entry.lines[0].credit_amount = new_amount;
1415                    }
1416
1417                    // Rebalance the entry
1418                    Self::rebalance_entry(entry, is_debit, impact);
1419
1420                    entry.header.header_text = Some(
1421                        entry.header.header_text.clone().unwrap_or_default()
1422                            + " [HUMAN_ERROR:ROUNDED]",
1423                    );
1424                }
1425            }
1426            4 => {
1427                // Late posting marker (document date much earlier than posting date)
1428                // This doesn't create an imbalance
1429                if entry.header.document_date == entry.header.posting_date {
1430                    let days_late = self.rng.random_range(5..15);
1431                    entry.header.document_date =
1432                        entry.header.posting_date - chrono::Duration::days(days_late);
1433                    entry.header.header_text = Some(
1434                        entry.header.header_text.clone().unwrap_or_default()
1435                            + " [HUMAN_ERROR:LATE_POSTING]",
1436                    );
1437                }
1438            }
1439            _ => {}
1440        }
1441    }
1442
1443    /// Apply approval workflow for high-value transactions.
1444    ///
1445    /// If the entry amount exceeds the approval threshold, simulate an
1446    /// approval workflow with appropriate approvers based on amount.
1447    fn maybe_apply_approval_workflow(
1448        &mut self,
1449        entry: &mut JournalEntry,
1450        _posting_date: NaiveDate,
1451    ) {
1452        use rust_decimal::Decimal;
1453
1454        let amount = entry.total_debit();
1455
1456        // Skip if amount is below threshold
1457        if amount <= self.approval_threshold {
1458            // Auto-approved below threshold
1459            let workflow = ApprovalWorkflow::auto_approved(
1460                entry.header.created_by.clone(),
1461                entry.header.user_persona.clone(),
1462                amount,
1463                entry.header.created_at,
1464            );
1465            entry.header.approval_workflow = Some(workflow);
1466            return;
1467        }
1468
1469        // Mark as SOX relevant for high-value transactions
1470        entry.header.sox_relevant = true;
1471
1472        // Determine required approval levels based on amount
1473        let required_levels = if amount > Decimal::new(100000, 0) {
1474            3 // Executive approval required
1475        } else if amount > Decimal::new(50000, 0) {
1476            2 // Senior management approval
1477        } else {
1478            1 // Manager approval
1479        };
1480
1481        // Create the approval workflow
1482        let mut workflow = ApprovalWorkflow::new(
1483            entry.header.created_by.clone(),
1484            entry.header.user_persona.clone(),
1485            amount,
1486        );
1487        workflow.required_levels = required_levels;
1488
1489        // Simulate submission
1490        let submit_time = entry.header.created_at;
1491        let submit_action = ApprovalAction::new(
1492            entry.header.created_by.clone(),
1493            entry.header.user_persona.clone(),
1494            self.parse_persona(&entry.header.user_persona),
1495            ApprovalActionType::Submit,
1496            0,
1497        )
1498        .with_timestamp(submit_time);
1499
1500        workflow.actions.push(submit_action);
1501        workflow.status = ApprovalStatus::Pending;
1502        workflow.submitted_at = Some(submit_time);
1503
1504        // Simulate approvals with realistic delays
1505        let mut current_time = submit_time;
1506        for level in 1..=required_levels {
1507            // Add delay for approval (1-3 business hours per level)
1508            let delay_hours = self.rng.random_range(1..4);
1509            current_time += chrono::Duration::hours(delay_hours);
1510
1511            // Skip weekends
1512            while current_time.weekday() == chrono::Weekday::Sat
1513                || current_time.weekday() == chrono::Weekday::Sun
1514            {
1515                current_time += chrono::Duration::days(1);
1516            }
1517
1518            // Generate approver based on level
1519            let (approver_id, approver_role) = self.select_approver(level);
1520
1521            let approve_action = ApprovalAction::new(
1522                approver_id.clone(),
1523                format!("{:?}", approver_role),
1524                approver_role,
1525                ApprovalActionType::Approve,
1526                level,
1527            )
1528            .with_timestamp(current_time);
1529
1530            workflow.actions.push(approve_action);
1531            workflow.current_level = level;
1532        }
1533
1534        // Mark as approved
1535        workflow.status = ApprovalStatus::Approved;
1536        workflow.approved_at = Some(current_time);
1537
1538        entry.header.approval_workflow = Some(workflow);
1539    }
1540
1541    /// Select an approver based on the required level.
1542    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1543        let persona = match level {
1544            1 => UserPersona::Manager,
1545            2 => UserPersona::Controller,
1546            _ => UserPersona::Executive,
1547        };
1548
1549        // Try to get from user pool first
1550        if let Some(ref pool) = self.user_pool {
1551            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1552                return (user.user_id.clone(), persona);
1553            }
1554        }
1555
1556        // Fallback to generated approver
1557        let approver_id = match persona {
1558            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1559            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1560            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1561            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1562        };
1563
1564        (approver_id, persona)
1565    }
1566
1567    /// Parse user persona from string.
1568    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1569        match persona_str.to_lowercase().as_str() {
1570            s if s.contains("junior") => UserPersona::JuniorAccountant,
1571            s if s.contains("senior") => UserPersona::SeniorAccountant,
1572            s if s.contains("controller") => UserPersona::Controller,
1573            s if s.contains("manager") => UserPersona::Manager,
1574            s if s.contains("executive") => UserPersona::Executive,
1575            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1576            _ => UserPersona::JuniorAccountant, // Default
1577        }
1578    }
1579
1580    /// Enable or disable approval workflow.
1581    pub fn with_approval(mut self, enabled: bool) -> Self {
1582        self.approval_enabled = enabled;
1583        self
1584    }
1585
1586    /// Set the approval threshold amount.
1587    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1588        self.approval_threshold = threshold;
1589        self
1590    }
1591
1592    /// Set the temporal drift controller for simulating distribution changes over time.
1593    ///
1594    /// When drift is enabled, amounts and other distributions will shift based on
1595    /// the period (month) to simulate realistic temporal evolution like inflation
1596    /// or increasing fraud rates.
1597    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1598        self.drift_controller = Some(controller);
1599        self
1600    }
1601
1602    /// Set drift configuration directly.
1603    ///
1604    /// Creates a drift controller from the config. Total periods is calculated
1605    /// from the date range.
1606    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1607        if config.enabled {
1608            let total_periods = self.calculate_total_periods();
1609            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1610        }
1611        self
1612    }
1613
1614    /// Calculate total periods (months) in the date range.
1615    fn calculate_total_periods(&self) -> u32 {
1616        let start_year = self.start_date.year();
1617        let start_month = self.start_date.month();
1618        let end_year = self.end_date.year();
1619        let end_month = self.end_date.month();
1620
1621        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1622    }
1623
1624    /// Calculate the period number (0-indexed) for a given date.
1625    fn date_to_period(&self, date: NaiveDate) -> u32 {
1626        let start_year = self.start_date.year();
1627        let start_month = self.start_date.month() as i32;
1628        let date_year = date.year();
1629        let date_month = date.month() as i32;
1630
1631        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1632    }
1633
1634    /// Get drift adjustments for a given date.
1635    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1636        if let Some(ref controller) = self.drift_controller {
1637            let period = self.date_to_period(date);
1638            controller.compute_adjustments(period)
1639        } else {
1640            DriftAdjustments::none()
1641        }
1642    }
1643
1644    /// Select a user from the pool or generate a generic user ID.
1645    #[inline]
1646    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1647        if let Some(ref pool) = self.user_pool {
1648            let persona = if is_automated {
1649                UserPersona::AutomatedSystem
1650            } else {
1651                // Random distribution among human personas
1652                let roll: f64 = self.rng.random();
1653                if roll < 0.4 {
1654                    UserPersona::JuniorAccountant
1655                } else if roll < 0.7 {
1656                    UserPersona::SeniorAccountant
1657                } else if roll < 0.85 {
1658                    UserPersona::Controller
1659                } else {
1660                    UserPersona::Manager
1661                }
1662            };
1663
1664            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1665                return (
1666                    user.user_id.clone(),
1667                    format!("{:?}", user.persona).to_lowercase(),
1668                );
1669            }
1670        }
1671
1672        // Fallback to generic format
1673        if is_automated {
1674            (
1675                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1676                "automated_system".to_string(),
1677            )
1678        } else {
1679            (
1680                format!("USER{:04}", self.rng.random_range(1..=40)),
1681                "senior_accountant".to_string(),
1682            )
1683        }
1684    }
1685
1686    /// Select transaction source based on configuration weights.
1687    #[inline]
1688    fn select_source(&mut self) -> TransactionSource {
1689        let roll: f64 = self.rng.random();
1690        let dist = &self.config.source_distribution;
1691
1692        if roll < dist.manual {
1693            TransactionSource::Manual
1694        } else if roll < dist.manual + dist.automated {
1695            TransactionSource::Automated
1696        } else if roll < dist.manual + dist.automated + dist.recurring {
1697            TransactionSource::Recurring
1698        } else {
1699            TransactionSource::Adjustment
1700        }
1701    }
1702
1703    /// Select a business process based on configuration weights.
1704    #[inline]
1705    fn select_business_process(&mut self) -> BusinessProcess {
1706        let roll: f64 = self.rng.random();
1707
1708        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1709        if roll < 0.35 {
1710            BusinessProcess::O2C
1711        } else if roll < 0.65 {
1712            BusinessProcess::P2P
1713        } else if roll < 0.85 {
1714            BusinessProcess::R2R
1715        } else if roll < 0.95 {
1716            BusinessProcess::H2R
1717        } else {
1718            BusinessProcess::A2R
1719        }
1720    }
1721
1722    #[inline]
1723    fn select_debit_account(&mut self) -> &GLAccount {
1724        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1725        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1726
1727        // 60% asset, 40% expense for debits
1728        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1729            accounts
1730        } else {
1731            expense_accounts
1732        };
1733
1734        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1735            tracing::warn!(
1736                "Account selection returned empty list, falling back to first COA account"
1737            );
1738            &self.coa.accounts[0]
1739        })
1740    }
1741
1742    #[inline]
1743    fn select_credit_account(&mut self) -> &GLAccount {
1744        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1745        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1746
1747        // 60% liability, 40% revenue for credits
1748        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1749            liability_accounts
1750        } else {
1751            revenue_accounts
1752        };
1753
1754        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1755            tracing::warn!(
1756                "Account selection returned empty list, falling back to first COA account"
1757            );
1758            &self.coa.accounts[0]
1759        })
1760    }
1761}
1762
1763impl Generator for JournalEntryGenerator {
1764    type Item = JournalEntry;
1765    type Config = (
1766        TransactionConfig,
1767        Arc<ChartOfAccounts>,
1768        Vec<String>,
1769        NaiveDate,
1770        NaiveDate,
1771    );
1772
1773    fn new(config: Self::Config, seed: u64) -> Self {
1774        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1775    }
1776
1777    fn generate_one(&mut self) -> Self::Item {
1778        self.generate()
1779    }
1780
1781    fn reset(&mut self) {
1782        self.rng = seeded_rng(self.seed, 0);
1783        self.line_sampler.reset(self.seed + 1);
1784        self.amount_sampler.reset(self.seed + 2);
1785        self.temporal_sampler.reset(self.seed + 3);
1786        self.count = 0;
1787        self.uuid_factory.reset();
1788
1789        // Reset reference generator by recreating it
1790        let mut ref_gen = ReferenceGenerator::new(
1791            self.start_date.year(),
1792            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1793        );
1794        ref_gen.set_prefix(
1795            ReferenceType::Invoice,
1796            &self.template_config.references.invoice_prefix,
1797        );
1798        ref_gen.set_prefix(
1799            ReferenceType::PurchaseOrder,
1800            &self.template_config.references.po_prefix,
1801        );
1802        ref_gen.set_prefix(
1803            ReferenceType::SalesOrder,
1804            &self.template_config.references.so_prefix,
1805        );
1806        self.reference_generator = ref_gen;
1807    }
1808
1809    fn count(&self) -> u64 {
1810        self.count
1811    }
1812
1813    fn seed(&self) -> u64 {
1814        self.seed
1815    }
1816}
1817
1818use datasynth_core::traits::ParallelGenerator;
1819
1820impl ParallelGenerator for JournalEntryGenerator {
1821    /// Split this generator into `parts` independent sub-generators.
1822    ///
1823    /// Each sub-generator gets a deterministic seed derived from the parent seed
1824    /// and its partition index, plus a partitioned UUID factory to avoid contention.
1825    /// The results are deterministic for a given partition count.
1826    fn split(self, parts: usize) -> Vec<Self> {
1827        let parts = parts.max(1);
1828        (0..parts)
1829            .map(|i| {
1830                // Derive a unique seed per partition using a golden-ratio constant
1831                let sub_seed = self
1832                    .seed
1833                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1834
1835                let mut gen = JournalEntryGenerator::new_with_full_config(
1836                    self.config.clone(),
1837                    Arc::clone(&self.coa),
1838                    self.companies.clone(),
1839                    self.start_date,
1840                    self.end_date,
1841                    sub_seed,
1842                    self.template_config.clone(),
1843                    self.user_pool.clone(),
1844                );
1845
1846                // Copy over configuration state
1847                gen.company_selector = self.company_selector.clone();
1848                gen.vendor_pool = self.vendor_pool.clone();
1849                gen.customer_pool = self.customer_pool.clone();
1850                gen.material_pool = self.material_pool.clone();
1851                gen.using_real_master_data = self.using_real_master_data;
1852                gen.fraud_config = self.fraud_config.clone();
1853                gen.persona_errors_enabled = self.persona_errors_enabled;
1854                gen.approval_enabled = self.approval_enabled;
1855                gen.approval_threshold = self.approval_threshold;
1856
1857                // Use partitioned UUID factory to eliminate atomic contention
1858                gen.uuid_factory = DeterministicUuidFactory::for_partition(
1859                    sub_seed,
1860                    GeneratorType::JournalEntry,
1861                    i as u8,
1862                );
1863
1864                // Copy temporal patterns if configured
1865                if let Some(ref config) = self.temporal_patterns_config {
1866                    gen.temporal_patterns_config = Some(config.clone());
1867                    // Rebuild business day calculator from the stored config
1868                    if config.business_days.enabled {
1869                        if let Some(ref bdc) = self.business_day_calculator {
1870                            gen.business_day_calculator = Some(bdc.clone());
1871                        }
1872                    }
1873                    // Rebuild processing lag calculator with partition seed
1874                    if config.processing_lags.enabled {
1875                        let lag_config =
1876                            Self::convert_processing_lag_config(&config.processing_lags);
1877                        gen.processing_lag_calculator =
1878                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
1879                    }
1880                }
1881
1882                // Copy drift controller if present
1883                if let Some(ref dc) = self.drift_controller {
1884                    gen.drift_controller = Some(dc.clone());
1885                }
1886
1887                gen
1888            })
1889            .collect()
1890    }
1891}
1892
1893#[cfg(test)]
1894#[allow(clippy::unwrap_used)]
1895mod tests {
1896    use super::*;
1897    use crate::ChartOfAccountsGenerator;
1898
1899    #[test]
1900    fn test_generate_balanced_entries() {
1901        let mut coa_gen =
1902            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1903        let coa = Arc::new(coa_gen.generate());
1904
1905        let mut je_gen = JournalEntryGenerator::new_with_params(
1906            TransactionConfig::default(),
1907            coa,
1908            vec!["1000".to_string()],
1909            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1910            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1911            42,
1912        );
1913
1914        let mut balanced_count = 0;
1915        for _ in 0..100 {
1916            let entry = je_gen.generate();
1917
1918            // Skip entries with human errors as they may be intentionally unbalanced
1919            let has_human_error = entry
1920                .header
1921                .header_text
1922                .as_ref()
1923                .map(|t| t.contains("[HUMAN_ERROR:"))
1924                .unwrap_or(false);
1925
1926            if !has_human_error {
1927                assert!(
1928                    entry.is_balanced(),
1929                    "Entry {:?} is not balanced",
1930                    entry.header.document_id
1931                );
1932                balanced_count += 1;
1933            }
1934            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1935        }
1936
1937        // Ensure most entries are balanced (human errors are rare)
1938        assert!(
1939            balanced_count >= 80,
1940            "Expected at least 80 balanced entries, got {}",
1941            balanced_count
1942        );
1943    }
1944
1945    #[test]
1946    fn test_deterministic_generation() {
1947        let mut coa_gen =
1948            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1949        let coa = Arc::new(coa_gen.generate());
1950
1951        let mut gen1 = JournalEntryGenerator::new_with_params(
1952            TransactionConfig::default(),
1953            Arc::clone(&coa),
1954            vec!["1000".to_string()],
1955            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1956            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1957            42,
1958        );
1959
1960        let mut gen2 = JournalEntryGenerator::new_with_params(
1961            TransactionConfig::default(),
1962            coa,
1963            vec!["1000".to_string()],
1964            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1965            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1966            42,
1967        );
1968
1969        for _ in 0..50 {
1970            let e1 = gen1.generate();
1971            let e2 = gen2.generate();
1972            assert_eq!(e1.header.document_id, e2.header.document_id);
1973            assert_eq!(e1.total_debit(), e2.total_debit());
1974        }
1975    }
1976
1977    #[test]
1978    fn test_templates_generate_descriptions() {
1979        let mut coa_gen =
1980            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1981        let coa = Arc::new(coa_gen.generate());
1982
1983        // Enable all template features
1984        let template_config = TemplateConfig {
1985            names: datasynth_config::schema::NameTemplateConfig {
1986                generate_realistic_names: true,
1987                email_domain: "test.com".to_string(),
1988                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1989            },
1990            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1991                generate_header_text: true,
1992                generate_line_text: true,
1993            },
1994            references: datasynth_config::schema::ReferenceTemplateConfig {
1995                generate_references: true,
1996                invoice_prefix: "TEST-INV".to_string(),
1997                po_prefix: "TEST-PO".to_string(),
1998                so_prefix: "TEST-SO".to_string(),
1999            },
2000        };
2001
2002        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2003            TransactionConfig::default(),
2004            coa,
2005            vec!["1000".to_string()],
2006            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2007            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2008            42,
2009            template_config,
2010            None,
2011        )
2012        .with_persona_errors(false); // Disable for template testing
2013
2014        for _ in 0..10 {
2015            let entry = je_gen.generate();
2016
2017            // Verify header text is populated
2018            assert!(
2019                entry.header.header_text.is_some(),
2020                "Header text should be populated"
2021            );
2022
2023            // Verify reference is populated
2024            assert!(
2025                entry.header.reference.is_some(),
2026                "Reference should be populated"
2027            );
2028
2029            // Verify business process is set
2030            assert!(
2031                entry.header.business_process.is_some(),
2032                "Business process should be set"
2033            );
2034
2035            // Verify line text is populated
2036            for line in &entry.lines {
2037                assert!(line.line_text.is_some(), "Line text should be populated");
2038            }
2039
2040            // Entry should still be balanced
2041            assert!(entry.is_balanced());
2042        }
2043    }
2044
2045    #[test]
2046    fn test_user_pool_integration() {
2047        let mut coa_gen =
2048            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2049        let coa = Arc::new(coa_gen.generate());
2050
2051        let companies = vec!["1000".to_string()];
2052
2053        // Generate user pool
2054        let mut user_gen = crate::UserGenerator::new(42);
2055        let user_pool = user_gen.generate_standard(&companies);
2056
2057        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2058            TransactionConfig::default(),
2059            coa,
2060            companies,
2061            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2062            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2063            42,
2064            TemplateConfig::default(),
2065            Some(user_pool),
2066        );
2067
2068        // Generate entries and verify user IDs are from pool
2069        for _ in 0..20 {
2070            let entry = je_gen.generate();
2071
2072            // User ID should not be generic BATCH/USER format when pool is used
2073            // (though it may still fall back if random selection misses)
2074            assert!(!entry.header.created_by.is_empty());
2075        }
2076    }
2077
2078    #[test]
2079    fn test_master_data_connection() {
2080        let mut coa_gen =
2081            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2082        let coa = Arc::new(coa_gen.generate());
2083
2084        // Create test vendors
2085        let vendors = vec![
2086            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2087            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2088        ];
2089
2090        // Create test customers
2091        let customers = vec![
2092            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2093            Customer::new(
2094                "C-TEST-002",
2095                "Test Customer Two",
2096                CustomerType::SmallBusiness,
2097            ),
2098        ];
2099
2100        // Create test materials
2101        let materials = vec![Material::new(
2102            "MAT-TEST-001",
2103            "Test Material A",
2104            MaterialType::RawMaterial,
2105        )];
2106
2107        // Create generator with master data
2108        let generator = JournalEntryGenerator::new_with_params(
2109            TransactionConfig::default(),
2110            coa,
2111            vec!["1000".to_string()],
2112            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2113            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2114            42,
2115        );
2116
2117        // Without master data
2118        assert!(!generator.is_using_real_master_data());
2119
2120        // Connect master data
2121        let generator_with_data = generator
2122            .with_vendors(&vendors)
2123            .with_customers(&customers)
2124            .with_materials(&materials);
2125
2126        // Should now be using real master data
2127        assert!(generator_with_data.is_using_real_master_data());
2128    }
2129
2130    #[test]
2131    fn test_with_master_data_convenience_method() {
2132        let mut coa_gen =
2133            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2134        let coa = Arc::new(coa_gen.generate());
2135
2136        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2137        let customers = vec![Customer::new(
2138            "C-001",
2139            "Customer One",
2140            CustomerType::Corporate,
2141        )];
2142        let materials = vec![Material::new(
2143            "MAT-001",
2144            "Material One",
2145            MaterialType::RawMaterial,
2146        )];
2147
2148        let generator = JournalEntryGenerator::new_with_params(
2149            TransactionConfig::default(),
2150            coa,
2151            vec!["1000".to_string()],
2152            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2153            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2154            42,
2155        )
2156        .with_master_data(&vendors, &customers, &materials);
2157
2158        assert!(generator.is_using_real_master_data());
2159    }
2160
2161    #[test]
2162    fn test_stress_factors_increase_error_rate() {
2163        let mut coa_gen =
2164            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2165        let coa = Arc::new(coa_gen.generate());
2166
2167        let generator = JournalEntryGenerator::new_with_params(
2168            TransactionConfig::default(),
2169            coa,
2170            vec!["1000".to_string()],
2171            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2172            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2173            42,
2174        );
2175
2176        let base_rate = 0.1;
2177
2178        // Regular day - no stress factors
2179        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2180        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2181        assert!(
2182            (regular_rate - base_rate).abs() < 0.01,
2183            "Regular day should have minimal stress factor adjustment"
2184        );
2185
2186        // Month end - 50% more errors
2187        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2188        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2189        assert!(
2190            month_end_rate > regular_rate,
2191            "Month end should have higher error rate than regular day"
2192        );
2193
2194        // Year end - double the error rate
2195        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2196        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2197        assert!(
2198            year_end_rate > month_end_rate,
2199            "Year end should have highest error rate"
2200        );
2201
2202        // Friday stress
2203        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2204        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2205        assert!(
2206            friday_rate > regular_rate,
2207            "Friday should have higher error rate than mid-week"
2208        );
2209
2210        // Monday stress
2211        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2212        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2213        assert!(
2214            monday_rate > regular_rate,
2215            "Monday should have higher error rate than mid-week"
2216        );
2217    }
2218
2219    #[test]
2220    fn test_batching_produces_similar_entries() {
2221        let mut coa_gen =
2222            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2223        let coa = Arc::new(coa_gen.generate());
2224
2225        // Use seed 123 which is more likely to trigger batching
2226        let mut je_gen = JournalEntryGenerator::new_with_params(
2227            TransactionConfig::default(),
2228            coa,
2229            vec!["1000".to_string()],
2230            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2231            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2232            123,
2233        )
2234        .with_persona_errors(false); // Disable to ensure balanced entries
2235
2236        // Generate many entries - at 15% batch rate, should see some batches
2237        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2238
2239        // Check that all entries are balanced (batched or not)
2240        for entry in &entries {
2241            assert!(
2242                entry.is_balanced(),
2243                "All entries including batched should be balanced"
2244            );
2245        }
2246
2247        // Count entries with same-day posting dates (batch indicator)
2248        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2249            std::collections::HashMap::new();
2250        for entry in &entries {
2251            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2252        }
2253
2254        // With batching, some dates should have multiple entries
2255        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2256        assert!(
2257            dates_with_multiple > 0,
2258            "With batching, should see some dates with multiple entries"
2259        );
2260    }
2261
2262    #[test]
2263    fn test_temporal_patterns_business_days() {
2264        use datasynth_config::schema::{
2265            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2266        };
2267
2268        let mut coa_gen =
2269            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2270        let coa = Arc::new(coa_gen.generate());
2271
2272        // Create temporal patterns config with business days enabled
2273        let temporal_config = TemporalPatternsConfig {
2274            enabled: true,
2275            business_days: BusinessDaySchemaConfig {
2276                enabled: true,
2277                ..Default::default()
2278            },
2279            calendars: CalendarSchemaConfig {
2280                regions: vec!["US".to_string()],
2281                custom_holidays: vec![],
2282            },
2283            ..Default::default()
2284        };
2285
2286        let mut je_gen = JournalEntryGenerator::new_with_params(
2287            TransactionConfig::default(),
2288            coa,
2289            vec!["1000".to_string()],
2290            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2291            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2292            42,
2293        )
2294        .with_temporal_patterns(temporal_config, 42)
2295        .with_persona_errors(false);
2296
2297        // Generate entries and verify none fall on weekends
2298        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2299
2300        for entry in &entries {
2301            let weekday = entry.header.posting_date.weekday();
2302            assert!(
2303                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2304                "Posting date {:?} should not be a weekend",
2305                entry.header.posting_date
2306            );
2307        }
2308    }
2309}