Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172        );
173        ref_gen.set_prefix(
174            ReferenceType::Invoice,
175            &template_config.references.invoice_prefix,
176        );
177        ref_gen.set_prefix(
178            ReferenceType::PurchaseOrder,
179            &template_config.references.po_prefix,
180        );
181        ref_gen.set_prefix(
182            ReferenceType::SalesOrder,
183            &template_config.references.so_prefix,
184        );
185
186        // Create weighted company selector (uniform weights for this constructor)
187        let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189        Self {
190            rng: seeded_rng(seed, 0),
191            seed,
192            config: config.clone(),
193            coa,
194            companies,
195            company_selector,
196            line_sampler: LineItemSampler::with_config(
197                seed + 1,
198                config.line_item_distribution.clone(),
199                config.even_odd_distribution.clone(),
200                config.debit_credit_distribution.clone(),
201            ),
202            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203            temporal_sampler: TemporalSampler::with_config(
204                seed + 3,
205                config.seasonality.clone(),
206                WorkingHoursConfig::default(),
207                Vec::new(),
208            ),
209            start_date,
210            end_date,
211            count: 0,
212            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213            user_pool,
214            description_generator: DescriptionGenerator::new(),
215            reference_generator: ref_gen,
216            template_config,
217            vendor_pool: VendorPool::standard(),
218            customer_pool: CustomerPool::standard(),
219            material_pool: None,
220            using_real_master_data: false,
221            fraud_config: FraudConfig::default(),
222            persona_errors_enabled: true, // Enable by default for realism
223            approval_enabled: true,       // Enable by default for realism
224            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
225            batch_state: None,
226            drift_controller: None,
227            business_day_calculator: None,
228            processing_lag_calculator: None,
229            temporal_patterns_config: None,
230        }
231    }
232
233    /// Create from a full GeneratorConfig.
234    ///
235    /// This constructor uses the volume_weight from company configs
236    /// for weighted company selection, and fraud config from GeneratorConfig.
237    pub fn from_generator_config(
238        full_config: &GeneratorConfig,
239        coa: Arc<ChartOfAccounts>,
240        start_date: NaiveDate,
241        end_date: NaiveDate,
242        seed: u64,
243    ) -> Self {
244        let companies: Vec<String> = full_config
245            .companies
246            .iter()
247            .map(|c| c.code.clone())
248            .collect();
249
250        // Create weighted selector using volume_weight from company configs
251        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253        let mut generator = Self::new_with_full_config(
254            full_config.transactions.clone(),
255            coa,
256            companies,
257            start_date,
258            end_date,
259            seed,
260            full_config.templates.clone(),
261            None,
262        );
263
264        // Override the uniform selector with weighted selector
265        generator.company_selector = company_selector;
266
267        // Set fraud config
268        generator.fraud_config = full_config.fraud.clone();
269
270        // Configure temporal patterns if enabled
271        let temporal_config = &full_config.temporal_patterns;
272        if temporal_config.enabled {
273            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274        }
275
276        generator
277    }
278
279    /// Configure temporal patterns including business day calculations and processing lags.
280    ///
281    /// This enables realistic temporal behavior including:
282    /// - Business day awareness (no postings on weekends/holidays)
283    /// - Processing lag modeling (event-to-posting delays)
284    /// - Period-end dynamics (volume spikes at month/quarter/year end)
285    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286        // Create business day calculator if enabled
287        if config.business_days.enabled {
288            let region = config
289                .calendars
290                .regions
291                .first()
292                .map(|r| Self::parse_region(r))
293                .unwrap_or(Region::US);
294
295            let calendar = HolidayCalendar::new(region, self.start_date.year());
296            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297        }
298
299        // Create processing lag calculator if enabled
300        if config.processing_lags.enabled {
301            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302            self.processing_lag_calculator =
303                Some(ProcessingLagCalculator::with_config(seed, lag_config));
304        }
305
306        // Create period-end dynamics if configured
307        let model = config.period_end.model.as_deref().unwrap_or("flat");
308        if model != "flat"
309            || config
310                .period_end
311                .month_end
312                .as_ref()
313                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314        {
315            let dynamics = Self::convert_period_end_config(&config.period_end);
316            self.temporal_sampler.set_period_end_dynamics(dynamics);
317        }
318
319        self.temporal_patterns_config = Some(config);
320        self
321    }
322
323    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
324    ///
325    /// This is an alternative to [`with_temporal_patterns`] that derives the
326    /// holiday calendar from a country-pack definition rather than the built-in
327    /// region-based calendars.  All other temporal behaviour (business-day
328    /// adjustment, processing lags, period-end dynamics) is configured
329    /// identically.
330    pub fn with_country_pack_temporal(
331        mut self,
332        config: TemporalPatternsConfig,
333        seed: u64,
334        pack: &CountryPack,
335    ) -> Self {
336        // Create business day calculator using the country pack calendar
337        if config.business_days.enabled {
338            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340        }
341
342        // Create processing lag calculator if enabled
343        if config.processing_lags.enabled {
344            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345            self.processing_lag_calculator =
346                Some(ProcessingLagCalculator::with_config(seed, lag_config));
347        }
348
349        // Create period-end dynamics if configured
350        let model = config.period_end.model.as_deref().unwrap_or("flat");
351        if model != "flat"
352            || config
353                .period_end
354                .month_end
355                .as_ref()
356                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357        {
358            let dynamics = Self::convert_period_end_config(&config.period_end);
359            self.temporal_sampler.set_period_end_dynamics(dynamics);
360        }
361
362        self.temporal_patterns_config = Some(config);
363        self
364    }
365
366    /// Convert schema processing lag config to core config.
367    fn convert_processing_lag_config(
368        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369    ) -> ProcessingLagConfig {
370        let mut config = ProcessingLagConfig {
371            enabled: schema.enabled,
372            ..Default::default()
373        };
374
375        // Helper to convert lag schema to distribution
376        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378            if let Some(min) = lag.min_hours {
379                dist.min_lag_hours = min;
380            }
381            if let Some(max) = lag.max_hours {
382                dist.max_lag_hours = max;
383            }
384            dist
385        };
386
387        // Apply event-specific lags
388        if let Some(ref lag) = schema.sales_order_lag {
389            config
390                .event_lags
391                .insert(EventType::SalesOrder, convert_lag(lag));
392        }
393        if let Some(ref lag) = schema.purchase_order_lag {
394            config
395                .event_lags
396                .insert(EventType::PurchaseOrder, convert_lag(lag));
397        }
398        if let Some(ref lag) = schema.goods_receipt_lag {
399            config
400                .event_lags
401                .insert(EventType::GoodsReceipt, convert_lag(lag));
402        }
403        if let Some(ref lag) = schema.invoice_receipt_lag {
404            config
405                .event_lags
406                .insert(EventType::InvoiceReceipt, convert_lag(lag));
407        }
408        if let Some(ref lag) = schema.invoice_issue_lag {
409            config
410                .event_lags
411                .insert(EventType::InvoiceIssue, convert_lag(lag));
412        }
413        if let Some(ref lag) = schema.payment_lag {
414            config
415                .event_lags
416                .insert(EventType::Payment, convert_lag(lag));
417        }
418        if let Some(ref lag) = schema.journal_entry_lag {
419            config
420                .event_lags
421                .insert(EventType::JournalEntry, convert_lag(lag));
422        }
423
424        // Apply cross-day posting config
425        if let Some(ref cross_day) = schema.cross_day_posting {
426            config.cross_day = CrossDayConfig {
427                enabled: cross_day.enabled,
428                probability_by_hour: cross_day.probability_by_hour.clone(),
429                ..Default::default()
430            };
431        }
432
433        config
434    }
435
436    /// Convert schema period-end config to core PeriodEndDynamics.
437    fn convert_period_end_config(
438        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439    ) -> PeriodEndDynamics {
440        let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442        // Helper to convert period config
443        let convert_period =
444            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445             default_peak: f64|
446             -> PeriodEndConfig {
447                if let Some(p) = period {
448                    let model = match model_type {
449                        "flat" => PeriodEndModel::FlatMultiplier {
450                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
451                        },
452                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453                            start_day: p.start_day.unwrap_or(-10),
454                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
455                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456                            ramp_up_days: 3, // Default ramp-up period
457                        },
458                        _ => PeriodEndModel::ExponentialAcceleration {
459                            start_day: p.start_day.unwrap_or(-10),
460                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
461                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                            decay_rate: p.decay_rate.unwrap_or(0.3),
463                        },
464                    };
465                    PeriodEndConfig {
466                        enabled: true,
467                        model,
468                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469                    }
470                } else {
471                    PeriodEndConfig {
472                        enabled: true,
473                        model: PeriodEndModel::ExponentialAcceleration {
474                            start_day: -10,
475                            base_multiplier: 1.0,
476                            peak_multiplier: default_peak,
477                            decay_rate: 0.3,
478                        },
479                        additional_multiplier: 1.0,
480                    }
481                }
482            };
483
484        PeriodEndDynamics::new(
485            convert_period(schema.month_end.as_ref(), 2.0),
486            convert_period(schema.quarter_end.as_ref(), 3.5),
487            convert_period(schema.year_end.as_ref(), 5.0),
488        )
489    }
490
491    /// Parse a region string into a Region enum.
492    fn parse_region(region_str: &str) -> Region {
493        match region_str.to_uppercase().as_str() {
494            "US" => Region::US,
495            "DE" => Region::DE,
496            "GB" => Region::GB,
497            "CN" => Region::CN,
498            "JP" => Region::JP,
499            "IN" => Region::IN,
500            "BR" => Region::BR,
501            "MX" => Region::MX,
502            "AU" => Region::AU,
503            "SG" => Region::SG,
504            "KR" => Region::KR,
505            "FR" => Region::FR,
506            "IT" => Region::IT,
507            "ES" => Region::ES,
508            "CA" => Region::CA,
509            _ => Region::US,
510        }
511    }
512
513    /// Set a custom company selector.
514    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
515        self.company_selector = selector;
516    }
517
518    /// Get the current company selector.
519    pub fn company_selector(&self) -> &WeightedCompanySelector {
520        &self.company_selector
521    }
522
523    /// Set fraud configuration.
524    pub fn set_fraud_config(&mut self, config: FraudConfig) {
525        self.fraud_config = config;
526    }
527
528    /// Set vendors from generated master data.
529    ///
530    /// This replaces the default vendor pool with actual generated vendors,
531    /// ensuring JEs reference real master data entities.
532    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
533        if !vendors.is_empty() {
534            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
535            self.using_real_master_data = true;
536        }
537        self
538    }
539
540    /// Set customers from generated master data.
541    ///
542    /// This replaces the default customer pool with actual generated customers,
543    /// ensuring JEs reference real master data entities.
544    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
545        if !customers.is_empty() {
546            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
547            self.using_real_master_data = true;
548        }
549        self
550    }
551
552    /// Set materials from generated master data.
553    ///
554    /// This provides material references for JEs that involve inventory movements.
555    pub fn with_materials(mut self, materials: &[Material]) -> Self {
556        if !materials.is_empty() {
557            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
558            self.using_real_master_data = true;
559        }
560        self
561    }
562
563    /// Set all master data at once for convenience.
564    ///
565    /// This is the recommended way to configure the JE generator with
566    /// generated master data to ensure data coherence.
567    pub fn with_master_data(
568        self,
569        vendors: &[Vendor],
570        customers: &[Customer],
571        materials: &[Material],
572    ) -> Self {
573        self.with_vendors(vendors)
574            .with_customers(customers)
575            .with_materials(materials)
576    }
577
578    /// Replace the user pool with one generated from a [`CountryPack`].
579    ///
580    /// This is an alternative to the default name-culture distribution that
581    /// derives name pools and weights from the country-pack's `names` section.
582    /// The existing user pool (if any) is discarded and regenerated using
583    /// [`MultiCultureNameGenerator::from_country_pack`].
584    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
585        let name_gen =
586            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
587        let config = UserGeneratorConfig {
588            // The culture distribution is embedded in the name generator
589            // itself, so we use an empty list here.
590            culture_distribution: Vec::new(),
591            email_domain: name_gen.email_domain().to_string(),
592            generate_realistic_names: true,
593        };
594        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
595        self.user_pool = Some(user_gen.generate_standard(&self.companies));
596        self
597    }
598
599    /// Check if the generator is using real master data.
600    pub fn is_using_real_master_data(&self) -> bool {
601        self.using_real_master_data
602    }
603
604    /// Determine if this transaction should be fraudulent.
605    fn determine_fraud(&mut self) -> Option<FraudType> {
606        if !self.fraud_config.enabled {
607            return None;
608        }
609
610        // Roll for fraud based on fraud rate
611        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
612            return None;
613        }
614
615        // Select fraud type based on distribution
616        Some(self.select_fraud_type())
617    }
618
619    /// Select a fraud type based on the configured distribution.
620    fn select_fraud_type(&mut self) -> FraudType {
621        let dist = &self.fraud_config.fraud_type_distribution;
622        let roll: f64 = self.rng.random();
623
624        let mut cumulative = 0.0;
625
626        cumulative += dist.suspense_account_abuse;
627        if roll < cumulative {
628            return FraudType::SuspenseAccountAbuse;
629        }
630
631        cumulative += dist.fictitious_transaction;
632        if roll < cumulative {
633            return FraudType::FictitiousTransaction;
634        }
635
636        cumulative += dist.revenue_manipulation;
637        if roll < cumulative {
638            return FraudType::RevenueManipulation;
639        }
640
641        cumulative += dist.expense_capitalization;
642        if roll < cumulative {
643            return FraudType::ExpenseCapitalization;
644        }
645
646        cumulative += dist.split_transaction;
647        if roll < cumulative {
648            return FraudType::SplitTransaction;
649        }
650
651        cumulative += dist.timing_anomaly;
652        if roll < cumulative {
653            return FraudType::TimingAnomaly;
654        }
655
656        cumulative += dist.unauthorized_access;
657        if roll < cumulative {
658            return FraudType::UnauthorizedAccess;
659        }
660
661        // Default fallback
662        FraudType::DuplicatePayment
663    }
664
665    /// Map a fraud type to an amount pattern for suspicious amounts.
666    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
667        match fraud_type {
668            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
669                FraudAmountPattern::ThresholdAdjacent
670            }
671            FraudType::FictitiousTransaction
672            | FraudType::FictitiousEntry
673            | FraudType::SuspenseAccountAbuse
674            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
675            FraudType::RevenueManipulation
676            | FraudType::ExpenseCapitalization
677            | FraudType::ImproperCapitalization
678            | FraudType::ReserveManipulation
679            | FraudType::UnauthorizedAccess
680            | FraudType::PrematureRevenue
681            | FraudType::UnderstatedLiabilities
682            | FraudType::OverstatedAssets
683            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
684            FraudType::DuplicatePayment
685            | FraudType::TimingAnomaly
686            | FraudType::SelfApproval
687            | FraudType::ExceededApprovalLimit
688            | FraudType::SegregationOfDutiesViolation
689            | FraudType::UnauthorizedApproval
690            | FraudType::CollusiveApproval
691            | FraudType::FictitiousVendor
692            | FraudType::ShellCompanyPayment
693            | FraudType::Kickback
694            | FraudType::KickbackScheme
695            | FraudType::InvoiceManipulation
696            | FraudType::AssetMisappropriation
697            | FraudType::InventoryTheft
698            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
699            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
700            FraudType::ImproperRevenueRecognition
701            | FraudType::ImproperPoAllocation
702            | FraudType::VariableConsiderationManipulation
703            | FraudType::ContractModificationMisstatement => {
704                FraudAmountPattern::StatisticallyImprobable
705            }
706            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
707            FraudType::LeaseClassificationManipulation
708            | FraudType::OffBalanceSheetLease
709            | FraudType::LeaseLiabilityUnderstatement
710            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
711            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
712            FraudType::FairValueHierarchyManipulation
713            | FraudType::Level3InputManipulation
714            | FraudType::ValuationTechniqueManipulation => {
715                FraudAmountPattern::StatisticallyImprobable
716            }
717            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
718            FraudType::DelayedImpairment
719            | FraudType::ImpairmentTestAvoidance
720            | FraudType::CashFlowProjectionManipulation
721            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
722            // Sourcing/Procurement Fraud
723            FraudType::BidRigging
724            | FraudType::PhantomVendorContract
725            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
726            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
727            // HR/Payroll Fraud
728            FraudType::GhostEmployeePayroll
729            | FraudType::PayrollInflation
730            | FraudType::DuplicateExpenseReport
731            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
732            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
733            // O2C Fraud
734            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
735            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
736        }
737    }
738
739    /// Generate a deterministic UUID using the factory.
740    #[inline]
741    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
742        self.uuid_factory.next()
743    }
744
745    /// Generate a single journal entry.
746    pub fn generate(&mut self) -> JournalEntry {
747        debug!(
748            count = self.count,
749            companies = self.companies.len(),
750            start_date = %self.start_date,
751            end_date = %self.end_date,
752            "Generating journal entry"
753        );
754
755        // Check if we're in a batch - if so, generate a batched entry
756        if let Some(ref state) = self.batch_state {
757            if state.remaining > 0 {
758                return self.generate_batched_entry();
759            }
760        }
761
762        self.count += 1;
763
764        // Generate deterministic document ID
765        let document_id = self.generate_deterministic_uuid();
766
767        // Sample posting date
768        let mut posting_date = self
769            .temporal_sampler
770            .sample_date(self.start_date, self.end_date);
771
772        // Adjust posting date to be a business day if business day calculator is configured
773        if let Some(ref calc) = self.business_day_calculator {
774            if !calc.is_business_day(posting_date) {
775                // Move to next business day
776                posting_date = calc.next_business_day(posting_date, false);
777                // Ensure we don't exceed end_date
778                if posting_date > self.end_date {
779                    posting_date = calc.prev_business_day(self.end_date, true);
780                }
781            }
782        }
783
784        // Select company using weighted selector
785        let company_code = self.company_selector.select(&mut self.rng).to_string();
786
787        // Sample line item specification
788        let line_spec = self.line_sampler.sample();
789
790        // Determine source type using full 4-way distribution
791        let source = self.select_source();
792        let is_automated = matches!(
793            source,
794            TransactionSource::Automated | TransactionSource::Recurring
795        );
796
797        // Select business process
798        let business_process = self.select_business_process();
799
800        // Determine if this is a fraudulent transaction
801        let fraud_type = self.determine_fraud();
802        let is_fraud = fraud_type.is_some();
803
804        // Sample time based on source
805        let time = self.temporal_sampler.sample_time(!is_automated);
806        let created_at = posting_date.and_time(time).and_utc();
807
808        // Select user from pool or generate generic
809        let (created_by, user_persona) = self.select_user(is_automated);
810
811        // Create header with deterministic UUID
812        let mut header =
813            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
814        header.created_at = created_at;
815        header.source = source;
816        header.created_by = created_by;
817        header.user_persona = user_persona;
818        header.business_process = Some(business_process);
819        header.is_fraud = is_fraud;
820        header.fraud_type = fraud_type;
821
822        // Generate description context
823        let mut context =
824            DescriptionContext::with_period(posting_date.month(), posting_date.year());
825
826        // Add vendor/customer context based on business process
827        match business_process {
828            BusinessProcess::P2P => {
829                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
830                    context.vendor_name = Some(vendor.name.clone());
831                }
832            }
833            BusinessProcess::O2C => {
834                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
835                    context.customer_name = Some(customer.name.clone());
836                }
837            }
838            _ => {}
839        }
840
841        // Generate header text if enabled
842        if self.template_config.descriptions.generate_header_text {
843            header.header_text = Some(self.description_generator.generate_header_text(
844                business_process,
845                &context,
846                &mut self.rng,
847            ));
848        }
849
850        // Generate reference if enabled
851        if self.template_config.references.generate_references {
852            header.reference = Some(
853                self.reference_generator
854                    .generate_for_process_year(business_process, posting_date.year()),
855            );
856        }
857
858        // Generate line items
859        let mut entry = JournalEntry::new(header);
860
861        // Generate amount - use fraud pattern if this is a fraudulent transaction
862        let base_amount = if let Some(ft) = fraud_type {
863            let pattern = self.fraud_type_to_amount_pattern(ft);
864            self.amount_sampler.sample_fraud(pattern)
865        } else {
866            self.amount_sampler.sample()
867        };
868
869        // Apply temporal drift if configured
870        let drift_adjusted_amount = {
871            let drift = self.get_drift_adjustments(posting_date);
872            if drift.amount_mean_multiplier != 1.0 {
873                // Apply drift multiplier (includes seasonal factor if enabled)
874                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
875                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
876                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
877            } else {
878                base_amount
879            }
880        };
881
882        // Apply human variation to amounts for non-automated transactions
883        let total_amount = if is_automated {
884            drift_adjusted_amount // Automated systems use exact amounts
885        } else {
886            self.apply_human_variation(drift_adjusted_amount)
887        };
888
889        // Generate debit lines
890        let debit_amounts = self
891            .amount_sampler
892            .sample_summing_to(line_spec.debit_count, total_amount);
893        for (i, amount) in debit_amounts.into_iter().enumerate() {
894            let account_number = self.select_debit_account().account_number.clone();
895            let mut line = JournalEntryLine::debit(
896                entry.header.document_id,
897                (i + 1) as u32,
898                account_number.clone(),
899                amount,
900            );
901
902            // Generate line text if enabled
903            if self.template_config.descriptions.generate_line_text {
904                line.line_text = Some(self.description_generator.generate_line_text(
905                    &account_number,
906                    &context,
907                    &mut self.rng,
908                ));
909            }
910
911            entry.add_line(line);
912        }
913
914        // Generate credit lines - use the SAME amounts to ensure balance
915        let credit_amounts = self
916            .amount_sampler
917            .sample_summing_to(line_spec.credit_count, total_amount);
918        for (i, amount) in credit_amounts.into_iter().enumerate() {
919            let account_number = self.select_credit_account().account_number.clone();
920            let mut line = JournalEntryLine::credit(
921                entry.header.document_id,
922                (line_spec.debit_count + i + 1) as u32,
923                account_number.clone(),
924                amount,
925            );
926
927            // Generate line text if enabled
928            if self.template_config.descriptions.generate_line_text {
929                line.line_text = Some(self.description_generator.generate_line_text(
930                    &account_number,
931                    &context,
932                    &mut self.rng,
933                ));
934            }
935
936            entry.add_line(line);
937        }
938
939        // Apply persona-based errors if enabled and it's a human user
940        if self.persona_errors_enabled && !is_automated {
941            self.maybe_inject_persona_error(&mut entry);
942        }
943
944        // Apply approval workflow if enabled and amount exceeds threshold
945        if self.approval_enabled {
946            self.maybe_apply_approval_workflow(&mut entry, posting_date);
947        }
948
949        // Maybe start a batch of similar entries for realism
950        self.maybe_start_batch(&entry);
951
952        entry
953    }
954
955    /// Enable or disable persona-based error injection.
956    ///
957    /// When enabled, entries created by human personas have a chance
958    /// to contain realistic human errors based on their experience level.
959    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
960        self.persona_errors_enabled = enabled;
961        self
962    }
963
964    /// Set fraud configuration for fraud injection.
965    ///
966    /// When fraud is enabled in the config, transactions have a chance
967    /// to be marked as fraudulent based on the configured fraud rate.
968    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
969        self.fraud_config = config;
970        self
971    }
972
973    /// Check if persona errors are enabled.
974    pub fn persona_errors_enabled(&self) -> bool {
975        self.persona_errors_enabled
976    }
977
978    /// Enable or disable batch processing behavior.
979    ///
980    /// When enabled (default), the generator will occasionally produce batches
981    /// of similar entries, simulating how humans batch similar work together.
982    pub fn with_batching(mut self, enabled: bool) -> Self {
983        if !enabled {
984            self.batch_state = None;
985        }
986        self
987    }
988
989    /// Check if batch processing is enabled.
990    pub fn batching_enabled(&self) -> bool {
991        // Batching is implicitly enabled when not explicitly disabled
992        true
993    }
994
995    /// Maybe start a batch based on the current entry.
996    ///
997    /// Humans often batch similar work: processing invoices from one vendor,
998    /// entering expense reports for a trip, reconciling similar items.
999    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1000        // Only start batch for non-automated, non-fraud entries
1001        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1002            return;
1003        }
1004
1005        // 15% chance to start a batch (most work is not batched)
1006        if self.rng.random::<f64>() > 0.15 {
1007            return;
1008        }
1009
1010        // Extract key attributes for batching
1011        let base_account = entry
1012            .lines
1013            .first()
1014            .map(|l| l.gl_account.clone())
1015            .unwrap_or_default();
1016
1017        let base_amount = entry.total_debit();
1018
1019        self.batch_state = Some(BatchState {
1020            base_account_number: base_account,
1021            base_amount,
1022            base_business_process: entry.header.business_process,
1023            base_posting_date: entry.header.posting_date,
1024            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1025        });
1026    }
1027
1028    /// Generate an entry that's part of the current batch.
1029    ///
1030    /// Batched entries have:
1031    /// - Same or very similar business process
1032    /// - Same posting date (batched work done together)
1033    /// - Similar amounts (within ±15%)
1034    /// - Same debit account (processing similar items)
1035    fn generate_batched_entry(&mut self) -> JournalEntry {
1036        use rust_decimal::Decimal;
1037
1038        // Decrement batch counter
1039        if let Some(ref mut state) = self.batch_state {
1040            state.remaining = state.remaining.saturating_sub(1);
1041        }
1042
1043        let Some(batch) = self.batch_state.clone() else {
1044            // This is a programming error - batch_state should be set before calling this method.
1045            // Clear state and fall back to generating a standard entry instead of panicking.
1046            tracing::warn!(
1047                "generate_batched_entry called without batch_state; generating standard entry"
1048            );
1049            self.batch_state = None;
1050            return self.generate();
1051        };
1052
1053        // Use the batch's posting date (work done on same day)
1054        let posting_date = batch.base_posting_date;
1055
1056        self.count += 1;
1057        let document_id = self.generate_deterministic_uuid();
1058
1059        // Select same company (batched work is usually same company)
1060        let company_code = self.company_selector.select(&mut self.rng).to_string();
1061
1062        // Use simplified line spec for batched entries (usually 2-line)
1063        let _line_spec = LineItemSpec {
1064            total_count: 2,
1065            debit_count: 1,
1066            credit_count: 1,
1067            split_type: DebitCreditSplit::Equal,
1068        };
1069
1070        // Batched entries are always manual
1071        let source = TransactionSource::Manual;
1072
1073        // Use the batch's business process
1074        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1075
1076        // Sample time
1077        let time = self.temporal_sampler.sample_time(true);
1078        let created_at = posting_date.and_time(time).and_utc();
1079
1080        // Same user for batched work
1081        let (created_by, user_persona) = self.select_user(false);
1082
1083        // Create header
1084        let mut header =
1085            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1086        header.created_at = created_at;
1087        header.source = source;
1088        header.created_by = created_by;
1089        header.user_persona = user_persona;
1090        header.business_process = Some(business_process);
1091
1092        // Generate similar amount (within ±15% of base)
1093        let variation = self.rng.random_range(-0.15..0.15);
1094        let varied_amount =
1095            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1096        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1097
1098        // Create the entry
1099        let mut entry = JournalEntry::new(header);
1100
1101        // Use same debit account as batch base
1102        let debit_line = JournalEntryLine::debit(
1103            entry.header.document_id,
1104            1,
1105            batch.base_account_number.clone(),
1106            total_amount,
1107        );
1108        entry.add_line(debit_line);
1109
1110        // Select a credit account
1111        let credit_account = self.select_credit_account().account_number.clone();
1112        let credit_line =
1113            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1114        entry.add_line(credit_line);
1115
1116        // Apply persona-based errors if enabled
1117        if self.persona_errors_enabled {
1118            self.maybe_inject_persona_error(&mut entry);
1119        }
1120
1121        // Apply approval workflow if enabled
1122        if self.approval_enabled {
1123            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1124        }
1125
1126        // Clear batch state if no more entries remaining
1127        if batch.remaining <= 1 {
1128            self.batch_state = None;
1129        }
1130
1131        entry
1132    }
1133
1134    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1135    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1136        // Parse persona from the entry header
1137        let persona_str = &entry.header.user_persona;
1138        let persona = match persona_str.to_lowercase().as_str() {
1139            s if s.contains("junior") => UserPersona::JuniorAccountant,
1140            s if s.contains("senior") => UserPersona::SeniorAccountant,
1141            s if s.contains("controller") => UserPersona::Controller,
1142            s if s.contains("manager") => UserPersona::Manager,
1143            s if s.contains("executive") => UserPersona::Executive,
1144            _ => return, // Don't inject errors for unknown personas
1145        };
1146
1147        // Get base error rate from persona
1148        let base_error_rate = persona.error_rate();
1149
1150        // Apply stress factors based on posting date
1151        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1152
1153        // Check if error should occur based on adjusted rate
1154        if self.rng.random::<f64>() >= adjusted_rate {
1155            return; // No error this time
1156        }
1157
1158        // Select and inject persona-appropriate error
1159        self.inject_human_error(entry, persona);
1160    }
1161
1162    /// Apply contextual stress factors to the base error rate.
1163    ///
1164    /// Stress factors increase error likelihood during:
1165    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1166    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1167    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1168    /// - Monday morning (catch-up work): 20% more errors
1169    /// - Friday afternoon (rushing to leave): 30% more errors
1170    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1171        use chrono::Datelike;
1172
1173        let mut rate = base_rate;
1174        let day = posting_date.day();
1175        let month = posting_date.month();
1176
1177        // Year-end stress (December 28-31): double the error rate
1178        if month == 12 && day >= 28 {
1179            rate *= 2.0;
1180            return rate.min(0.5); // Cap at 50% to keep it realistic
1181        }
1182
1183        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1184        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1185            rate *= 1.75; // 75% more errors at quarter end
1186            return rate.min(0.4);
1187        }
1188
1189        // Month-end stress (last 3 days of month)
1190        if day >= 28 {
1191            rate *= 1.5; // 50% more errors at month end
1192        }
1193
1194        // Day-of-week stress effects
1195        let weekday = posting_date.weekday();
1196        match weekday {
1197            chrono::Weekday::Mon => {
1198                // Monday: catching up, often rushed
1199                rate *= 1.2;
1200            }
1201            chrono::Weekday::Fri => {
1202                // Friday: rushing to finish before weekend
1203                rate *= 1.3;
1204            }
1205            _ => {}
1206        }
1207
1208        // Cap at 40% to keep it realistic
1209        rate.min(0.4)
1210    }
1211
1212    /// Apply human-like variation to an amount.
1213    ///
1214    /// Humans don't enter perfectly calculated amounts - they:
1215    /// - Round amounts differently
1216    /// - Estimate instead of calculating exactly
1217    /// - Make small input variations
1218    ///
1219    /// This applies small variations (typically ±2%) to make amounts more realistic.
1220    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1221        use rust_decimal::Decimal;
1222
1223        // Automated transactions or very small amounts don't get variation
1224        if amount < Decimal::from(10) {
1225            return amount;
1226        }
1227
1228        // 70% chance of human variation being applied
1229        if self.rng.random::<f64>() > 0.70 {
1230            return amount;
1231        }
1232
1233        // Decide which type of human variation to apply
1234        let variation_type: u8 = self.rng.random_range(0..4);
1235
1236        match variation_type {
1237            0 => {
1238                // ±2% variation (common for estimated amounts)
1239                let variation_pct = self.rng.random_range(-0.02..0.02);
1240                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1241                (amount + variation).round_dp(2)
1242            }
1243            1 => {
1244                // Round to nearest $10
1245                let ten = Decimal::from(10);
1246                (amount / ten).round() * ten
1247            }
1248            2 => {
1249                // Round to nearest $100 (for larger amounts)
1250                if amount >= Decimal::from(500) {
1251                    let hundred = Decimal::from(100);
1252                    (amount / hundred).round() * hundred
1253                } else {
1254                    amount
1255                }
1256            }
1257            3 => {
1258                // Slight under/over payment (±$0.01 to ±$1.00)
1259                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1260                (amount + cents).max(Decimal::ZERO).round_dp(2)
1261            }
1262            _ => amount,
1263        }
1264    }
1265
1266    /// Rebalance an entry after a one-sided amount modification.
1267    ///
1268    /// When an error modifies one line's amount, this finds a line on the opposite
1269    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1270    /// same impact to maintain balance.
1271    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1272        // Find a line on the opposite side to adjust
1273        let balancing_idx = entry.lines.iter().position(|l| {
1274            if modified_was_debit {
1275                l.credit_amount > Decimal::ZERO
1276            } else {
1277                l.debit_amount > Decimal::ZERO
1278            }
1279        });
1280
1281        if let Some(idx) = balancing_idx {
1282            if modified_was_debit {
1283                entry.lines[idx].credit_amount += impact;
1284            } else {
1285                entry.lines[idx].debit_amount += impact;
1286            }
1287        }
1288    }
1289
1290    /// Inject a human-like error based on the persona.
1291    ///
1292    /// All error types maintain balance - amount modifications are applied to both sides.
1293    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1294    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1295        use rust_decimal::Decimal;
1296
1297        // Different personas make different types of errors
1298        let error_type: u8 = match persona {
1299            UserPersona::JuniorAccountant => {
1300                // Junior accountants make more varied errors
1301                self.rng.random_range(0..5)
1302            }
1303            UserPersona::SeniorAccountant => {
1304                // Senior accountants mainly make transposition errors
1305                self.rng.random_range(0..3)
1306            }
1307            UserPersona::Controller | UserPersona::Manager => {
1308                // Controllers/managers mainly make rounding or cutoff errors
1309                self.rng.random_range(3..5)
1310            }
1311            _ => return,
1312        };
1313
1314        match error_type {
1315            0 => {
1316                // Transposed digits in an amount
1317                if let Some(line) = entry.lines.get_mut(0) {
1318                    let is_debit = line.debit_amount > Decimal::ZERO;
1319                    let original_amount = if is_debit {
1320                        line.debit_amount
1321                    } else {
1322                        line.credit_amount
1323                    };
1324
1325                    // Simple digit swap in the string representation
1326                    let s = original_amount.to_string();
1327                    if s.len() >= 2 {
1328                        let chars: Vec<char> = s.chars().collect();
1329                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1330                        if chars[pos].is_ascii_digit()
1331                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1332                        {
1333                            let mut new_chars = chars;
1334                            new_chars.swap(pos, pos + 1);
1335                            if let Ok(new_amount) =
1336                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1337                            {
1338                                let impact = new_amount - original_amount;
1339
1340                                // Apply to the modified line
1341                                if is_debit {
1342                                    entry.lines[0].debit_amount = new_amount;
1343                                } else {
1344                                    entry.lines[0].credit_amount = new_amount;
1345                                }
1346
1347                                // Rebalance the entry
1348                                Self::rebalance_entry(entry, is_debit, impact);
1349
1350                                entry.header.header_text = Some(
1351                                    entry.header.header_text.clone().unwrap_or_default()
1352                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1353                                );
1354                            }
1355                        }
1356                    }
1357                }
1358            }
1359            1 => {
1360                // Wrong decimal place (off by factor of 10)
1361                if let Some(line) = entry.lines.get_mut(0) {
1362                    let is_debit = line.debit_amount > Decimal::ZERO;
1363                    let original_amount = if is_debit {
1364                        line.debit_amount
1365                    } else {
1366                        line.credit_amount
1367                    };
1368
1369                    let new_amount = original_amount * Decimal::new(10, 0);
1370                    let impact = new_amount - original_amount;
1371
1372                    // Apply to the modified line
1373                    if is_debit {
1374                        entry.lines[0].debit_amount = new_amount;
1375                    } else {
1376                        entry.lines[0].credit_amount = new_amount;
1377                    }
1378
1379                    // Rebalance the entry
1380                    Self::rebalance_entry(entry, is_debit, impact);
1381
1382                    entry.header.header_text = Some(
1383                        entry.header.header_text.clone().unwrap_or_default()
1384                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1385                    );
1386                }
1387            }
1388            2 => {
1389                // Typo in description (doesn't affect balance)
1390                if let Some(ref mut text) = entry.header.header_text {
1391                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1392                    let correct = ["the", "and", "with", "that", "receive"];
1393                    let idx = self.rng.random_range(0..typos.len());
1394                    if text.to_lowercase().contains(correct[idx]) {
1395                        *text = text.replace(correct[idx], typos[idx]);
1396                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1397                    }
1398                }
1399            }
1400            3 => {
1401                // Rounding to round number
1402                if let Some(line) = entry.lines.get_mut(0) {
1403                    let is_debit = line.debit_amount > Decimal::ZERO;
1404                    let original_amount = if is_debit {
1405                        line.debit_amount
1406                    } else {
1407                        line.credit_amount
1408                    };
1409
1410                    let new_amount =
1411                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1412                    let impact = new_amount - original_amount;
1413
1414                    // Apply to the modified line
1415                    if is_debit {
1416                        entry.lines[0].debit_amount = new_amount;
1417                    } else {
1418                        entry.lines[0].credit_amount = new_amount;
1419                    }
1420
1421                    // Rebalance the entry
1422                    Self::rebalance_entry(entry, is_debit, impact);
1423
1424                    entry.header.header_text = Some(
1425                        entry.header.header_text.clone().unwrap_or_default()
1426                            + " [HUMAN_ERROR:ROUNDED]",
1427                    );
1428                }
1429            }
1430            4 => {
1431                // Late posting marker (document date much earlier than posting date)
1432                // This doesn't create an imbalance
1433                if entry.header.document_date == entry.header.posting_date {
1434                    let days_late = self.rng.random_range(5..15);
1435                    entry.header.document_date =
1436                        entry.header.posting_date - chrono::Duration::days(days_late);
1437                    entry.header.header_text = Some(
1438                        entry.header.header_text.clone().unwrap_or_default()
1439                            + " [HUMAN_ERROR:LATE_POSTING]",
1440                    );
1441                }
1442            }
1443            _ => {}
1444        }
1445    }
1446
1447    /// Apply approval workflow for high-value transactions.
1448    ///
1449    /// If the entry amount exceeds the approval threshold, simulate an
1450    /// approval workflow with appropriate approvers based on amount.
1451    fn maybe_apply_approval_workflow(
1452        &mut self,
1453        entry: &mut JournalEntry,
1454        _posting_date: NaiveDate,
1455    ) {
1456        use rust_decimal::Decimal;
1457
1458        let amount = entry.total_debit();
1459
1460        // Skip if amount is below threshold
1461        if amount <= self.approval_threshold {
1462            // Auto-approved below threshold
1463            let workflow = ApprovalWorkflow::auto_approved(
1464                entry.header.created_by.clone(),
1465                entry.header.user_persona.clone(),
1466                amount,
1467                entry.header.created_at,
1468            );
1469            entry.header.approval_workflow = Some(workflow);
1470            return;
1471        }
1472
1473        // Mark as SOX relevant for high-value transactions
1474        entry.header.sox_relevant = true;
1475
1476        // Determine required approval levels based on amount
1477        let required_levels = if amount > Decimal::new(100000, 0) {
1478            3 // Executive approval required
1479        } else if amount > Decimal::new(50000, 0) {
1480            2 // Senior management approval
1481        } else {
1482            1 // Manager approval
1483        };
1484
1485        // Create the approval workflow
1486        let mut workflow = ApprovalWorkflow::new(
1487            entry.header.created_by.clone(),
1488            entry.header.user_persona.clone(),
1489            amount,
1490        );
1491        workflow.required_levels = required_levels;
1492
1493        // Simulate submission
1494        let submit_time = entry.header.created_at;
1495        let submit_action = ApprovalAction::new(
1496            entry.header.created_by.clone(),
1497            entry.header.user_persona.clone(),
1498            self.parse_persona(&entry.header.user_persona),
1499            ApprovalActionType::Submit,
1500            0,
1501        )
1502        .with_timestamp(submit_time);
1503
1504        workflow.actions.push(submit_action);
1505        workflow.status = ApprovalStatus::Pending;
1506        workflow.submitted_at = Some(submit_time);
1507
1508        // Simulate approvals with realistic delays
1509        let mut current_time = submit_time;
1510        for level in 1..=required_levels {
1511            // Add delay for approval (1-3 business hours per level)
1512            let delay_hours = self.rng.random_range(1..4);
1513            current_time += chrono::Duration::hours(delay_hours);
1514
1515            // Skip weekends
1516            while current_time.weekday() == chrono::Weekday::Sat
1517                || current_time.weekday() == chrono::Weekday::Sun
1518            {
1519                current_time += chrono::Duration::days(1);
1520            }
1521
1522            // Generate approver based on level
1523            let (approver_id, approver_role) = self.select_approver(level);
1524
1525            let approve_action = ApprovalAction::new(
1526                approver_id.clone(),
1527                format!("{:?}", approver_role),
1528                approver_role,
1529                ApprovalActionType::Approve,
1530                level,
1531            )
1532            .with_timestamp(current_time);
1533
1534            workflow.actions.push(approve_action);
1535            workflow.current_level = level;
1536        }
1537
1538        // Mark as approved
1539        workflow.status = ApprovalStatus::Approved;
1540        workflow.approved_at = Some(current_time);
1541
1542        entry.header.approval_workflow = Some(workflow);
1543    }
1544
1545    /// Select an approver based on the required level.
1546    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1547        let persona = match level {
1548            1 => UserPersona::Manager,
1549            2 => UserPersona::Controller,
1550            _ => UserPersona::Executive,
1551        };
1552
1553        // Try to get from user pool first
1554        if let Some(ref pool) = self.user_pool {
1555            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1556                return (user.user_id.clone(), persona);
1557            }
1558        }
1559
1560        // Fallback to generated approver
1561        let approver_id = match persona {
1562            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1563            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1564            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1565            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1566        };
1567
1568        (approver_id, persona)
1569    }
1570
1571    /// Parse user persona from string.
1572    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1573        match persona_str.to_lowercase().as_str() {
1574            s if s.contains("junior") => UserPersona::JuniorAccountant,
1575            s if s.contains("senior") => UserPersona::SeniorAccountant,
1576            s if s.contains("controller") => UserPersona::Controller,
1577            s if s.contains("manager") => UserPersona::Manager,
1578            s if s.contains("executive") => UserPersona::Executive,
1579            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1580            _ => UserPersona::JuniorAccountant, // Default
1581        }
1582    }
1583
1584    /// Enable or disable approval workflow.
1585    pub fn with_approval(mut self, enabled: bool) -> Self {
1586        self.approval_enabled = enabled;
1587        self
1588    }
1589
1590    /// Set the approval threshold amount.
1591    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1592        self.approval_threshold = threshold;
1593        self
1594    }
1595
1596    /// Set the temporal drift controller for simulating distribution changes over time.
1597    ///
1598    /// When drift is enabled, amounts and other distributions will shift based on
1599    /// the period (month) to simulate realistic temporal evolution like inflation
1600    /// or increasing fraud rates.
1601    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1602        self.drift_controller = Some(controller);
1603        self
1604    }
1605
1606    /// Set drift configuration directly.
1607    ///
1608    /// Creates a drift controller from the config. Total periods is calculated
1609    /// from the date range.
1610    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1611        if config.enabled {
1612            let total_periods = self.calculate_total_periods();
1613            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1614        }
1615        self
1616    }
1617
1618    /// Calculate total periods (months) in the date range.
1619    fn calculate_total_periods(&self) -> u32 {
1620        let start_year = self.start_date.year();
1621        let start_month = self.start_date.month();
1622        let end_year = self.end_date.year();
1623        let end_month = self.end_date.month();
1624
1625        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1626    }
1627
1628    /// Calculate the period number (0-indexed) for a given date.
1629    fn date_to_period(&self, date: NaiveDate) -> u32 {
1630        let start_year = self.start_date.year();
1631        let start_month = self.start_date.month() as i32;
1632        let date_year = date.year();
1633        let date_month = date.month() as i32;
1634
1635        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1636    }
1637
1638    /// Get drift adjustments for a given date.
1639    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1640        if let Some(ref controller) = self.drift_controller {
1641            let period = self.date_to_period(date);
1642            controller.compute_adjustments(period)
1643        } else {
1644            DriftAdjustments::none()
1645        }
1646    }
1647
1648    /// Select a user from the pool or generate a generic user ID.
1649    #[inline]
1650    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1651        if let Some(ref pool) = self.user_pool {
1652            let persona = if is_automated {
1653                UserPersona::AutomatedSystem
1654            } else {
1655                // Random distribution among human personas
1656                let roll: f64 = self.rng.random();
1657                if roll < 0.4 {
1658                    UserPersona::JuniorAccountant
1659                } else if roll < 0.7 {
1660                    UserPersona::SeniorAccountant
1661                } else if roll < 0.85 {
1662                    UserPersona::Controller
1663                } else {
1664                    UserPersona::Manager
1665                }
1666            };
1667
1668            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1669                return (
1670                    user.user_id.clone(),
1671                    format!("{:?}", user.persona).to_lowercase(),
1672                );
1673            }
1674        }
1675
1676        // Fallback to generic format
1677        if is_automated {
1678            (
1679                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1680                "automated_system".to_string(),
1681            )
1682        } else {
1683            (
1684                format!("USER{:04}", self.rng.random_range(1..=40)),
1685                "senior_accountant".to_string(),
1686            )
1687        }
1688    }
1689
1690    /// Select transaction source based on configuration weights.
1691    #[inline]
1692    fn select_source(&mut self) -> TransactionSource {
1693        let roll: f64 = self.rng.random();
1694        let dist = &self.config.source_distribution;
1695
1696        if roll < dist.manual {
1697            TransactionSource::Manual
1698        } else if roll < dist.manual + dist.automated {
1699            TransactionSource::Automated
1700        } else if roll < dist.manual + dist.automated + dist.recurring {
1701            TransactionSource::Recurring
1702        } else {
1703            TransactionSource::Adjustment
1704        }
1705    }
1706
1707    /// Select a business process based on configuration weights.
1708    #[inline]
1709    fn select_business_process(&mut self) -> BusinessProcess {
1710        let roll: f64 = self.rng.random();
1711
1712        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1713        if roll < 0.35 {
1714            BusinessProcess::O2C
1715        } else if roll < 0.65 {
1716            BusinessProcess::P2P
1717        } else if roll < 0.85 {
1718            BusinessProcess::R2R
1719        } else if roll < 0.95 {
1720            BusinessProcess::H2R
1721        } else {
1722            BusinessProcess::A2R
1723        }
1724    }
1725
1726    #[inline]
1727    fn select_debit_account(&mut self) -> &GLAccount {
1728        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1729        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1730
1731        // 60% asset, 40% expense for debits
1732        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1733            accounts
1734        } else {
1735            expense_accounts
1736        };
1737
1738        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1739            tracing::warn!(
1740                "Account selection returned empty list, falling back to first COA account"
1741            );
1742            &self.coa.accounts[0]
1743        })
1744    }
1745
1746    #[inline]
1747    fn select_credit_account(&mut self) -> &GLAccount {
1748        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1749        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1750
1751        // 60% liability, 40% revenue for credits
1752        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1753            liability_accounts
1754        } else {
1755            revenue_accounts
1756        };
1757
1758        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1759            tracing::warn!(
1760                "Account selection returned empty list, falling back to first COA account"
1761            );
1762            &self.coa.accounts[0]
1763        })
1764    }
1765}
1766
1767impl Generator for JournalEntryGenerator {
1768    type Item = JournalEntry;
1769    type Config = (
1770        TransactionConfig,
1771        Arc<ChartOfAccounts>,
1772        Vec<String>,
1773        NaiveDate,
1774        NaiveDate,
1775    );
1776
1777    fn new(config: Self::Config, seed: u64) -> Self {
1778        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1779    }
1780
1781    fn generate_one(&mut self) -> Self::Item {
1782        self.generate()
1783    }
1784
1785    fn reset(&mut self) {
1786        self.rng = seeded_rng(self.seed, 0);
1787        self.line_sampler.reset(self.seed + 1);
1788        self.amount_sampler.reset(self.seed + 2);
1789        self.temporal_sampler.reset(self.seed + 3);
1790        self.count = 0;
1791        self.uuid_factory.reset();
1792
1793        // Reset reference generator by recreating it
1794        let mut ref_gen = ReferenceGenerator::new(
1795            self.start_date.year(),
1796            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1797        );
1798        ref_gen.set_prefix(
1799            ReferenceType::Invoice,
1800            &self.template_config.references.invoice_prefix,
1801        );
1802        ref_gen.set_prefix(
1803            ReferenceType::PurchaseOrder,
1804            &self.template_config.references.po_prefix,
1805        );
1806        ref_gen.set_prefix(
1807            ReferenceType::SalesOrder,
1808            &self.template_config.references.so_prefix,
1809        );
1810        self.reference_generator = ref_gen;
1811    }
1812
1813    fn count(&self) -> u64 {
1814        self.count
1815    }
1816
1817    fn seed(&self) -> u64 {
1818        self.seed
1819    }
1820}
1821
1822use datasynth_core::traits::ParallelGenerator;
1823
1824impl ParallelGenerator for JournalEntryGenerator {
1825    /// Split this generator into `parts` independent sub-generators.
1826    ///
1827    /// Each sub-generator gets a deterministic seed derived from the parent seed
1828    /// and its partition index, plus a partitioned UUID factory to avoid contention.
1829    /// The results are deterministic for a given partition count.
1830    fn split(self, parts: usize) -> Vec<Self> {
1831        let parts = parts.max(1);
1832        (0..parts)
1833            .map(|i| {
1834                // Derive a unique seed per partition using a golden-ratio constant
1835                let sub_seed = self
1836                    .seed
1837                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1838
1839                let mut gen = JournalEntryGenerator::new_with_full_config(
1840                    self.config.clone(),
1841                    Arc::clone(&self.coa),
1842                    self.companies.clone(),
1843                    self.start_date,
1844                    self.end_date,
1845                    sub_seed,
1846                    self.template_config.clone(),
1847                    self.user_pool.clone(),
1848                );
1849
1850                // Copy over configuration state
1851                gen.company_selector = self.company_selector.clone();
1852                gen.vendor_pool = self.vendor_pool.clone();
1853                gen.customer_pool = self.customer_pool.clone();
1854                gen.material_pool = self.material_pool.clone();
1855                gen.using_real_master_data = self.using_real_master_data;
1856                gen.fraud_config = self.fraud_config.clone();
1857                gen.persona_errors_enabled = self.persona_errors_enabled;
1858                gen.approval_enabled = self.approval_enabled;
1859                gen.approval_threshold = self.approval_threshold;
1860
1861                // Use partitioned UUID factory to eliminate atomic contention
1862                gen.uuid_factory = DeterministicUuidFactory::for_partition(
1863                    sub_seed,
1864                    GeneratorType::JournalEntry,
1865                    i as u8,
1866                );
1867
1868                // Copy temporal patterns if configured
1869                if let Some(ref config) = self.temporal_patterns_config {
1870                    gen.temporal_patterns_config = Some(config.clone());
1871                    // Rebuild business day calculator from the stored config
1872                    if config.business_days.enabled {
1873                        if let Some(ref bdc) = self.business_day_calculator {
1874                            gen.business_day_calculator = Some(bdc.clone());
1875                        }
1876                    }
1877                    // Rebuild processing lag calculator with partition seed
1878                    if config.processing_lags.enabled {
1879                        let lag_config =
1880                            Self::convert_processing_lag_config(&config.processing_lags);
1881                        gen.processing_lag_calculator =
1882                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
1883                    }
1884                }
1885
1886                // Copy drift controller if present
1887                if let Some(ref dc) = self.drift_controller {
1888                    gen.drift_controller = Some(dc.clone());
1889                }
1890
1891                gen
1892            })
1893            .collect()
1894    }
1895}
1896
1897#[cfg(test)]
1898#[allow(clippy::unwrap_used)]
1899mod tests {
1900    use super::*;
1901    use crate::ChartOfAccountsGenerator;
1902
1903    #[test]
1904    fn test_generate_balanced_entries() {
1905        let mut coa_gen =
1906            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1907        let coa = Arc::new(coa_gen.generate());
1908
1909        let mut je_gen = JournalEntryGenerator::new_with_params(
1910            TransactionConfig::default(),
1911            coa,
1912            vec!["1000".to_string()],
1913            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1914            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1915            42,
1916        );
1917
1918        let mut balanced_count = 0;
1919        for _ in 0..100 {
1920            let entry = je_gen.generate();
1921
1922            // Skip entries with human errors as they may be intentionally unbalanced
1923            let has_human_error = entry
1924                .header
1925                .header_text
1926                .as_ref()
1927                .map(|t| t.contains("[HUMAN_ERROR:"))
1928                .unwrap_or(false);
1929
1930            if !has_human_error {
1931                assert!(
1932                    entry.is_balanced(),
1933                    "Entry {:?} is not balanced",
1934                    entry.header.document_id
1935                );
1936                balanced_count += 1;
1937            }
1938            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1939        }
1940
1941        // Ensure most entries are balanced (human errors are rare)
1942        assert!(
1943            balanced_count >= 80,
1944            "Expected at least 80 balanced entries, got {}",
1945            balanced_count
1946        );
1947    }
1948
1949    #[test]
1950    fn test_deterministic_generation() {
1951        let mut coa_gen =
1952            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1953        let coa = Arc::new(coa_gen.generate());
1954
1955        let mut gen1 = JournalEntryGenerator::new_with_params(
1956            TransactionConfig::default(),
1957            Arc::clone(&coa),
1958            vec!["1000".to_string()],
1959            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1960            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1961            42,
1962        );
1963
1964        let mut gen2 = JournalEntryGenerator::new_with_params(
1965            TransactionConfig::default(),
1966            coa,
1967            vec!["1000".to_string()],
1968            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1969            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1970            42,
1971        );
1972
1973        for _ in 0..50 {
1974            let e1 = gen1.generate();
1975            let e2 = gen2.generate();
1976            assert_eq!(e1.header.document_id, e2.header.document_id);
1977            assert_eq!(e1.total_debit(), e2.total_debit());
1978        }
1979    }
1980
1981    #[test]
1982    fn test_templates_generate_descriptions() {
1983        let mut coa_gen =
1984            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1985        let coa = Arc::new(coa_gen.generate());
1986
1987        // Enable all template features
1988        let template_config = TemplateConfig {
1989            names: datasynth_config::schema::NameTemplateConfig {
1990                generate_realistic_names: true,
1991                email_domain: "test.com".to_string(),
1992                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1993            },
1994            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1995                generate_header_text: true,
1996                generate_line_text: true,
1997            },
1998            references: datasynth_config::schema::ReferenceTemplateConfig {
1999                generate_references: true,
2000                invoice_prefix: "TEST-INV".to_string(),
2001                po_prefix: "TEST-PO".to_string(),
2002                so_prefix: "TEST-SO".to_string(),
2003            },
2004        };
2005
2006        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2007            TransactionConfig::default(),
2008            coa,
2009            vec!["1000".to_string()],
2010            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2011            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2012            42,
2013            template_config,
2014            None,
2015        )
2016        .with_persona_errors(false); // Disable for template testing
2017
2018        for _ in 0..10 {
2019            let entry = je_gen.generate();
2020
2021            // Verify header text is populated
2022            assert!(
2023                entry.header.header_text.is_some(),
2024                "Header text should be populated"
2025            );
2026
2027            // Verify reference is populated
2028            assert!(
2029                entry.header.reference.is_some(),
2030                "Reference should be populated"
2031            );
2032
2033            // Verify business process is set
2034            assert!(
2035                entry.header.business_process.is_some(),
2036                "Business process should be set"
2037            );
2038
2039            // Verify line text is populated
2040            for line in &entry.lines {
2041                assert!(line.line_text.is_some(), "Line text should be populated");
2042            }
2043
2044            // Entry should still be balanced
2045            assert!(entry.is_balanced());
2046        }
2047    }
2048
2049    #[test]
2050    fn test_user_pool_integration() {
2051        let mut coa_gen =
2052            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2053        let coa = Arc::new(coa_gen.generate());
2054
2055        let companies = vec!["1000".to_string()];
2056
2057        // Generate user pool
2058        let mut user_gen = crate::UserGenerator::new(42);
2059        let user_pool = user_gen.generate_standard(&companies);
2060
2061        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2062            TransactionConfig::default(),
2063            coa,
2064            companies,
2065            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2066            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2067            42,
2068            TemplateConfig::default(),
2069            Some(user_pool),
2070        );
2071
2072        // Generate entries and verify user IDs are from pool
2073        for _ in 0..20 {
2074            let entry = je_gen.generate();
2075
2076            // User ID should not be generic BATCH/USER format when pool is used
2077            // (though it may still fall back if random selection misses)
2078            assert!(!entry.header.created_by.is_empty());
2079        }
2080    }
2081
2082    #[test]
2083    fn test_master_data_connection() {
2084        let mut coa_gen =
2085            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2086        let coa = Arc::new(coa_gen.generate());
2087
2088        // Create test vendors
2089        let vendors = vec![
2090            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2091            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2092        ];
2093
2094        // Create test customers
2095        let customers = vec![
2096            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2097            Customer::new(
2098                "C-TEST-002",
2099                "Test Customer Two",
2100                CustomerType::SmallBusiness,
2101            ),
2102        ];
2103
2104        // Create test materials
2105        let materials = vec![Material::new(
2106            "MAT-TEST-001",
2107            "Test Material A",
2108            MaterialType::RawMaterial,
2109        )];
2110
2111        // Create generator with master data
2112        let generator = JournalEntryGenerator::new_with_params(
2113            TransactionConfig::default(),
2114            coa,
2115            vec!["1000".to_string()],
2116            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2117            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2118            42,
2119        );
2120
2121        // Without master data
2122        assert!(!generator.is_using_real_master_data());
2123
2124        // Connect master data
2125        let generator_with_data = generator
2126            .with_vendors(&vendors)
2127            .with_customers(&customers)
2128            .with_materials(&materials);
2129
2130        // Should now be using real master data
2131        assert!(generator_with_data.is_using_real_master_data());
2132    }
2133
2134    #[test]
2135    fn test_with_master_data_convenience_method() {
2136        let mut coa_gen =
2137            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2138        let coa = Arc::new(coa_gen.generate());
2139
2140        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2141        let customers = vec![Customer::new(
2142            "C-001",
2143            "Customer One",
2144            CustomerType::Corporate,
2145        )];
2146        let materials = vec![Material::new(
2147            "MAT-001",
2148            "Material One",
2149            MaterialType::RawMaterial,
2150        )];
2151
2152        let generator = JournalEntryGenerator::new_with_params(
2153            TransactionConfig::default(),
2154            coa,
2155            vec!["1000".to_string()],
2156            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2157            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2158            42,
2159        )
2160        .with_master_data(&vendors, &customers, &materials);
2161
2162        assert!(generator.is_using_real_master_data());
2163    }
2164
2165    #[test]
2166    fn test_stress_factors_increase_error_rate() {
2167        let mut coa_gen =
2168            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2169        let coa = Arc::new(coa_gen.generate());
2170
2171        let generator = JournalEntryGenerator::new_with_params(
2172            TransactionConfig::default(),
2173            coa,
2174            vec!["1000".to_string()],
2175            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2176            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2177            42,
2178        );
2179
2180        let base_rate = 0.1;
2181
2182        // Regular day - no stress factors
2183        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2184        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2185        assert!(
2186            (regular_rate - base_rate).abs() < 0.01,
2187            "Regular day should have minimal stress factor adjustment"
2188        );
2189
2190        // Month end - 50% more errors
2191        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2192        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2193        assert!(
2194            month_end_rate > regular_rate,
2195            "Month end should have higher error rate than regular day"
2196        );
2197
2198        // Year end - double the error rate
2199        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2200        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2201        assert!(
2202            year_end_rate > month_end_rate,
2203            "Year end should have highest error rate"
2204        );
2205
2206        // Friday stress
2207        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2208        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2209        assert!(
2210            friday_rate > regular_rate,
2211            "Friday should have higher error rate than mid-week"
2212        );
2213
2214        // Monday stress
2215        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2216        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2217        assert!(
2218            monday_rate > regular_rate,
2219            "Monday should have higher error rate than mid-week"
2220        );
2221    }
2222
2223    #[test]
2224    fn test_batching_produces_similar_entries() {
2225        let mut coa_gen =
2226            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2227        let coa = Arc::new(coa_gen.generate());
2228
2229        // Use seed 123 which is more likely to trigger batching
2230        let mut je_gen = JournalEntryGenerator::new_with_params(
2231            TransactionConfig::default(),
2232            coa,
2233            vec!["1000".to_string()],
2234            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2235            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2236            123,
2237        )
2238        .with_persona_errors(false); // Disable to ensure balanced entries
2239
2240        // Generate many entries - at 15% batch rate, should see some batches
2241        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2242
2243        // Check that all entries are balanced (batched or not)
2244        for entry in &entries {
2245            assert!(
2246                entry.is_balanced(),
2247                "All entries including batched should be balanced"
2248            );
2249        }
2250
2251        // Count entries with same-day posting dates (batch indicator)
2252        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2253            std::collections::HashMap::new();
2254        for entry in &entries {
2255            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2256        }
2257
2258        // With batching, some dates should have multiple entries
2259        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2260        assert!(
2261            dates_with_multiple > 0,
2262            "With batching, should see some dates with multiple entries"
2263        );
2264    }
2265
2266    #[test]
2267    fn test_temporal_patterns_business_days() {
2268        use datasynth_config::schema::{
2269            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2270        };
2271
2272        let mut coa_gen =
2273            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2274        let coa = Arc::new(coa_gen.generate());
2275
2276        // Create temporal patterns config with business days enabled
2277        let temporal_config = TemporalPatternsConfig {
2278            enabled: true,
2279            business_days: BusinessDaySchemaConfig {
2280                enabled: true,
2281                ..Default::default()
2282            },
2283            calendars: CalendarSchemaConfig {
2284                regions: vec!["US".to_string()],
2285                custom_holidays: vec![],
2286            },
2287            ..Default::default()
2288        };
2289
2290        let mut je_gen = JournalEntryGenerator::new_with_params(
2291            TransactionConfig::default(),
2292            coa,
2293            vec!["1000".to_string()],
2294            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2295            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2296            42,
2297        )
2298        .with_temporal_patterns(temporal_config, 42)
2299        .with_persona_errors(false);
2300
2301        // Generate entries and verify none fall on weekends
2302        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2303
2304        for entry in &entries {
2305            let weekday = entry.header.posting_date.weekday();
2306            assert!(
2307                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2308                "Posting date {:?} should not be a weekend",
2309                entry.header.posting_date
2310            );
2311        }
2312    }
2313}