Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172        );
173        ref_gen.set_prefix(
174            ReferenceType::Invoice,
175            &template_config.references.invoice_prefix,
176        );
177        ref_gen.set_prefix(
178            ReferenceType::PurchaseOrder,
179            &template_config.references.po_prefix,
180        );
181        ref_gen.set_prefix(
182            ReferenceType::SalesOrder,
183            &template_config.references.so_prefix,
184        );
185
186        // Create weighted company selector (uniform weights for this constructor)
187        let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189        Self {
190            rng: seeded_rng(seed, 0),
191            seed,
192            config: config.clone(),
193            coa,
194            companies,
195            company_selector,
196            line_sampler: LineItemSampler::with_config(
197                seed + 1,
198                config.line_item_distribution.clone(),
199                config.even_odd_distribution.clone(),
200                config.debit_credit_distribution.clone(),
201            ),
202            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203            temporal_sampler: TemporalSampler::with_config(
204                seed + 3,
205                config.seasonality.clone(),
206                WorkingHoursConfig::default(),
207                Vec::new(),
208            ),
209            start_date,
210            end_date,
211            count: 0,
212            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213            user_pool,
214            description_generator: DescriptionGenerator::new(),
215            reference_generator: ref_gen,
216            template_config,
217            vendor_pool: VendorPool::standard(),
218            customer_pool: CustomerPool::standard(),
219            material_pool: None,
220            using_real_master_data: false,
221            fraud_config: FraudConfig::default(),
222            persona_errors_enabled: true, // Enable by default for realism
223            approval_enabled: true,       // Enable by default for realism
224            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
225            batch_state: None,
226            drift_controller: None,
227            business_day_calculator: None,
228            processing_lag_calculator: None,
229            temporal_patterns_config: None,
230        }
231    }
232
233    /// Create from a full GeneratorConfig.
234    ///
235    /// This constructor uses the volume_weight from company configs
236    /// for weighted company selection, and fraud config from GeneratorConfig.
237    pub fn from_generator_config(
238        full_config: &GeneratorConfig,
239        coa: Arc<ChartOfAccounts>,
240        start_date: NaiveDate,
241        end_date: NaiveDate,
242        seed: u64,
243    ) -> Self {
244        let companies: Vec<String> = full_config
245            .companies
246            .iter()
247            .map(|c| c.code.clone())
248            .collect();
249
250        // Create weighted selector using volume_weight from company configs
251        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253        let mut generator = Self::new_with_full_config(
254            full_config.transactions.clone(),
255            coa,
256            companies,
257            start_date,
258            end_date,
259            seed,
260            full_config.templates.clone(),
261            None,
262        );
263
264        // Override the uniform selector with weighted selector
265        generator.company_selector = company_selector;
266
267        // Set fraud config
268        generator.fraud_config = full_config.fraud.clone();
269
270        // Configure temporal patterns if enabled
271        let temporal_config = &full_config.temporal_patterns;
272        if temporal_config.enabled {
273            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274        }
275
276        generator
277    }
278
279    /// Configure temporal patterns including business day calculations and processing lags.
280    ///
281    /// This enables realistic temporal behavior including:
282    /// - Business day awareness (no postings on weekends/holidays)
283    /// - Processing lag modeling (event-to-posting delays)
284    /// - Period-end dynamics (volume spikes at month/quarter/year end)
285    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286        // Create business day calculator if enabled
287        if config.business_days.enabled {
288            let region = config
289                .calendars
290                .regions
291                .first()
292                .map(|r| Self::parse_region(r))
293                .unwrap_or(Region::US);
294
295            let calendar = HolidayCalendar::new(region, self.start_date.year());
296            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297        }
298
299        // Create processing lag calculator if enabled
300        if config.processing_lags.enabled {
301            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302            self.processing_lag_calculator =
303                Some(ProcessingLagCalculator::with_config(seed, lag_config));
304        }
305
306        // Create period-end dynamics if configured
307        let model = config.period_end.model.as_deref().unwrap_or("flat");
308        if model != "flat"
309            || config
310                .period_end
311                .month_end
312                .as_ref()
313                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314        {
315            let dynamics = Self::convert_period_end_config(&config.period_end);
316            self.temporal_sampler.set_period_end_dynamics(dynamics);
317        }
318
319        self.temporal_patterns_config = Some(config);
320        self
321    }
322
323    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
324    ///
325    /// This is an alternative to [`with_temporal_patterns`] that derives the
326    /// holiday calendar from a country-pack definition rather than the built-in
327    /// region-based calendars.  All other temporal behaviour (business-day
328    /// adjustment, processing lags, period-end dynamics) is configured
329    /// identically.
330    pub fn with_country_pack_temporal(
331        mut self,
332        config: TemporalPatternsConfig,
333        seed: u64,
334        pack: &CountryPack,
335    ) -> Self {
336        // Create business day calculator using the country pack calendar
337        if config.business_days.enabled {
338            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340        }
341
342        // Create processing lag calculator if enabled
343        if config.processing_lags.enabled {
344            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345            self.processing_lag_calculator =
346                Some(ProcessingLagCalculator::with_config(seed, lag_config));
347        }
348
349        // Create period-end dynamics if configured
350        let model = config.period_end.model.as_deref().unwrap_or("flat");
351        if model != "flat"
352            || config
353                .period_end
354                .month_end
355                .as_ref()
356                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357        {
358            let dynamics = Self::convert_period_end_config(&config.period_end);
359            self.temporal_sampler.set_period_end_dynamics(dynamics);
360        }
361
362        self.temporal_patterns_config = Some(config);
363        self
364    }
365
366    /// Convert schema processing lag config to core config.
367    fn convert_processing_lag_config(
368        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369    ) -> ProcessingLagConfig {
370        let mut config = ProcessingLagConfig {
371            enabled: schema.enabled,
372            ..Default::default()
373        };
374
375        // Helper to convert lag schema to distribution
376        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378            if let Some(min) = lag.min_hours {
379                dist.min_lag_hours = min;
380            }
381            if let Some(max) = lag.max_hours {
382                dist.max_lag_hours = max;
383            }
384            dist
385        };
386
387        // Apply event-specific lags
388        if let Some(ref lag) = schema.sales_order_lag {
389            config
390                .event_lags
391                .insert(EventType::SalesOrder, convert_lag(lag));
392        }
393        if let Some(ref lag) = schema.purchase_order_lag {
394            config
395                .event_lags
396                .insert(EventType::PurchaseOrder, convert_lag(lag));
397        }
398        if let Some(ref lag) = schema.goods_receipt_lag {
399            config
400                .event_lags
401                .insert(EventType::GoodsReceipt, convert_lag(lag));
402        }
403        if let Some(ref lag) = schema.invoice_receipt_lag {
404            config
405                .event_lags
406                .insert(EventType::InvoiceReceipt, convert_lag(lag));
407        }
408        if let Some(ref lag) = schema.invoice_issue_lag {
409            config
410                .event_lags
411                .insert(EventType::InvoiceIssue, convert_lag(lag));
412        }
413        if let Some(ref lag) = schema.payment_lag {
414            config
415                .event_lags
416                .insert(EventType::Payment, convert_lag(lag));
417        }
418        if let Some(ref lag) = schema.journal_entry_lag {
419            config
420                .event_lags
421                .insert(EventType::JournalEntry, convert_lag(lag));
422        }
423
424        // Apply cross-day posting config
425        if let Some(ref cross_day) = schema.cross_day_posting {
426            config.cross_day = CrossDayConfig {
427                enabled: cross_day.enabled,
428                probability_by_hour: cross_day.probability_by_hour.clone(),
429                ..Default::default()
430            };
431        }
432
433        config
434    }
435
436    /// Convert schema period-end config to core PeriodEndDynamics.
437    fn convert_period_end_config(
438        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439    ) -> PeriodEndDynamics {
440        let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442        // Helper to convert period config
443        let convert_period =
444            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445             default_peak: f64|
446             -> PeriodEndConfig {
447                if let Some(p) = period {
448                    let model = match model_type {
449                        "flat" => PeriodEndModel::FlatMultiplier {
450                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
451                        },
452                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453                            start_day: p.start_day.unwrap_or(-10),
454                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
455                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456                            ramp_up_days: 3, // Default ramp-up period
457                        },
458                        _ => PeriodEndModel::ExponentialAcceleration {
459                            start_day: p.start_day.unwrap_or(-10),
460                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
461                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                            decay_rate: p.decay_rate.unwrap_or(0.3),
463                        },
464                    };
465                    PeriodEndConfig {
466                        enabled: true,
467                        model,
468                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469                    }
470                } else {
471                    PeriodEndConfig {
472                        enabled: true,
473                        model: PeriodEndModel::ExponentialAcceleration {
474                            start_day: -10,
475                            base_multiplier: 1.0,
476                            peak_multiplier: default_peak,
477                            decay_rate: 0.3,
478                        },
479                        additional_multiplier: 1.0,
480                    }
481                }
482            };
483
484        PeriodEndDynamics::new(
485            convert_period(schema.month_end.as_ref(), 2.0),
486            convert_period(schema.quarter_end.as_ref(), 3.5),
487            convert_period(schema.year_end.as_ref(), 5.0),
488        )
489    }
490
491    /// Parse a region string into a Region enum.
492    fn parse_region(region_str: &str) -> Region {
493        match region_str.to_uppercase().as_str() {
494            "US" => Region::US,
495            "DE" => Region::DE,
496            "GB" => Region::GB,
497            "CN" => Region::CN,
498            "JP" => Region::JP,
499            "IN" => Region::IN,
500            "BR" => Region::BR,
501            "MX" => Region::MX,
502            "AU" => Region::AU,
503            "SG" => Region::SG,
504            "KR" => Region::KR,
505            _ => Region::US,
506        }
507    }
508
509    /// Set a custom company selector.
510    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
511        self.company_selector = selector;
512    }
513
514    /// Get the current company selector.
515    pub fn company_selector(&self) -> &WeightedCompanySelector {
516        &self.company_selector
517    }
518
519    /// Set fraud configuration.
520    pub fn set_fraud_config(&mut self, config: FraudConfig) {
521        self.fraud_config = config;
522    }
523
524    /// Set vendors from generated master data.
525    ///
526    /// This replaces the default vendor pool with actual generated vendors,
527    /// ensuring JEs reference real master data entities.
528    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
529        if !vendors.is_empty() {
530            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
531            self.using_real_master_data = true;
532        }
533        self
534    }
535
536    /// Set customers from generated master data.
537    ///
538    /// This replaces the default customer pool with actual generated customers,
539    /// ensuring JEs reference real master data entities.
540    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
541        if !customers.is_empty() {
542            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
543            self.using_real_master_data = true;
544        }
545        self
546    }
547
548    /// Set materials from generated master data.
549    ///
550    /// This provides material references for JEs that involve inventory movements.
551    pub fn with_materials(mut self, materials: &[Material]) -> Self {
552        if !materials.is_empty() {
553            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
554            self.using_real_master_data = true;
555        }
556        self
557    }
558
559    /// Set all master data at once for convenience.
560    ///
561    /// This is the recommended way to configure the JE generator with
562    /// generated master data to ensure data coherence.
563    pub fn with_master_data(
564        self,
565        vendors: &[Vendor],
566        customers: &[Customer],
567        materials: &[Material],
568    ) -> Self {
569        self.with_vendors(vendors)
570            .with_customers(customers)
571            .with_materials(materials)
572    }
573
574    /// Replace the user pool with one generated from a [`CountryPack`].
575    ///
576    /// This is an alternative to the default name-culture distribution that
577    /// derives name pools and weights from the country-pack's `names` section.
578    /// The existing user pool (if any) is discarded and regenerated using
579    /// [`MultiCultureNameGenerator::from_country_pack`].
580    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
581        let name_gen =
582            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
583        let config = UserGeneratorConfig {
584            // The culture distribution is embedded in the name generator
585            // itself, so we use an empty list here.
586            culture_distribution: Vec::new(),
587            email_domain: name_gen.email_domain().to_string(),
588            generate_realistic_names: true,
589        };
590        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
591        self.user_pool = Some(user_gen.generate_standard(&self.companies));
592        self
593    }
594
595    /// Check if the generator is using real master data.
596    pub fn is_using_real_master_data(&self) -> bool {
597        self.using_real_master_data
598    }
599
600    /// Determine if this transaction should be fraudulent.
601    fn determine_fraud(&mut self) -> Option<FraudType> {
602        if !self.fraud_config.enabled {
603            return None;
604        }
605
606        // Roll for fraud based on fraud rate
607        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
608            return None;
609        }
610
611        // Select fraud type based on distribution
612        Some(self.select_fraud_type())
613    }
614
615    /// Select a fraud type based on the configured distribution.
616    fn select_fraud_type(&mut self) -> FraudType {
617        let dist = &self.fraud_config.fraud_type_distribution;
618        let roll: f64 = self.rng.random();
619
620        let mut cumulative = 0.0;
621
622        cumulative += dist.suspense_account_abuse;
623        if roll < cumulative {
624            return FraudType::SuspenseAccountAbuse;
625        }
626
627        cumulative += dist.fictitious_transaction;
628        if roll < cumulative {
629            return FraudType::FictitiousTransaction;
630        }
631
632        cumulative += dist.revenue_manipulation;
633        if roll < cumulative {
634            return FraudType::RevenueManipulation;
635        }
636
637        cumulative += dist.expense_capitalization;
638        if roll < cumulative {
639            return FraudType::ExpenseCapitalization;
640        }
641
642        cumulative += dist.split_transaction;
643        if roll < cumulative {
644            return FraudType::SplitTransaction;
645        }
646
647        cumulative += dist.timing_anomaly;
648        if roll < cumulative {
649            return FraudType::TimingAnomaly;
650        }
651
652        cumulative += dist.unauthorized_access;
653        if roll < cumulative {
654            return FraudType::UnauthorizedAccess;
655        }
656
657        // Default fallback
658        FraudType::DuplicatePayment
659    }
660
661    /// Map a fraud type to an amount pattern for suspicious amounts.
662    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
663        match fraud_type {
664            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
665                FraudAmountPattern::ThresholdAdjacent
666            }
667            FraudType::FictitiousTransaction
668            | FraudType::FictitiousEntry
669            | FraudType::SuspenseAccountAbuse
670            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
671            FraudType::RevenueManipulation
672            | FraudType::ExpenseCapitalization
673            | FraudType::ImproperCapitalization
674            | FraudType::ReserveManipulation
675            | FraudType::UnauthorizedAccess
676            | FraudType::PrematureRevenue
677            | FraudType::UnderstatedLiabilities
678            | FraudType::OverstatedAssets
679            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
680            FraudType::DuplicatePayment
681            | FraudType::TimingAnomaly
682            | FraudType::SelfApproval
683            | FraudType::ExceededApprovalLimit
684            | FraudType::SegregationOfDutiesViolation
685            | FraudType::UnauthorizedApproval
686            | FraudType::CollusiveApproval
687            | FraudType::FictitiousVendor
688            | FraudType::ShellCompanyPayment
689            | FraudType::Kickback
690            | FraudType::KickbackScheme
691            | FraudType::InvoiceManipulation
692            | FraudType::AssetMisappropriation
693            | FraudType::InventoryTheft
694            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
695            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
696            FraudType::ImproperRevenueRecognition
697            | FraudType::ImproperPoAllocation
698            | FraudType::VariableConsiderationManipulation
699            | FraudType::ContractModificationMisstatement => {
700                FraudAmountPattern::StatisticallyImprobable
701            }
702            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
703            FraudType::LeaseClassificationManipulation
704            | FraudType::OffBalanceSheetLease
705            | FraudType::LeaseLiabilityUnderstatement
706            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
707            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
708            FraudType::FairValueHierarchyManipulation
709            | FraudType::Level3InputManipulation
710            | FraudType::ValuationTechniqueManipulation => {
711                FraudAmountPattern::StatisticallyImprobable
712            }
713            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
714            FraudType::DelayedImpairment
715            | FraudType::ImpairmentTestAvoidance
716            | FraudType::CashFlowProjectionManipulation
717            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
718            // Sourcing/Procurement Fraud
719            FraudType::BidRigging
720            | FraudType::PhantomVendorContract
721            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
722            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
723            // HR/Payroll Fraud
724            FraudType::GhostEmployeePayroll
725            | FraudType::PayrollInflation
726            | FraudType::DuplicateExpenseReport
727            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
728            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
729            // O2C Fraud
730            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
731            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
732        }
733    }
734
735    /// Generate a deterministic UUID using the factory.
736    #[inline]
737    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
738        self.uuid_factory.next()
739    }
740
741    /// Generate a single journal entry.
742    pub fn generate(&mut self) -> JournalEntry {
743        debug!(
744            count = self.count,
745            companies = self.companies.len(),
746            start_date = %self.start_date,
747            end_date = %self.end_date,
748            "Generating journal entry"
749        );
750
751        // Check if we're in a batch - if so, generate a batched entry
752        if let Some(ref state) = self.batch_state {
753            if state.remaining > 0 {
754                return self.generate_batched_entry();
755            }
756        }
757
758        self.count += 1;
759
760        // Generate deterministic document ID
761        let document_id = self.generate_deterministic_uuid();
762
763        // Sample posting date
764        let mut posting_date = self
765            .temporal_sampler
766            .sample_date(self.start_date, self.end_date);
767
768        // Adjust posting date to be a business day if business day calculator is configured
769        if let Some(ref calc) = self.business_day_calculator {
770            if !calc.is_business_day(posting_date) {
771                // Move to next business day
772                posting_date = calc.next_business_day(posting_date, false);
773                // Ensure we don't exceed end_date
774                if posting_date > self.end_date {
775                    posting_date = calc.prev_business_day(self.end_date, true);
776                }
777            }
778        }
779
780        // Select company using weighted selector
781        let company_code = self.company_selector.select(&mut self.rng).to_string();
782
783        // Sample line item specification
784        let line_spec = self.line_sampler.sample();
785
786        // Determine source type using full 4-way distribution
787        let source = self.select_source();
788        let is_automated = matches!(
789            source,
790            TransactionSource::Automated | TransactionSource::Recurring
791        );
792
793        // Select business process
794        let business_process = self.select_business_process();
795
796        // Determine if this is a fraudulent transaction
797        let fraud_type = self.determine_fraud();
798        let is_fraud = fraud_type.is_some();
799
800        // Sample time based on source
801        let time = self.temporal_sampler.sample_time(!is_automated);
802        let created_at = posting_date.and_time(time).and_utc();
803
804        // Select user from pool or generate generic
805        let (created_by, user_persona) = self.select_user(is_automated);
806
807        // Create header with deterministic UUID
808        let mut header =
809            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
810        header.created_at = created_at;
811        header.source = source;
812        header.created_by = created_by;
813        header.user_persona = user_persona;
814        header.business_process = Some(business_process);
815        header.is_fraud = is_fraud;
816        header.fraud_type = fraud_type;
817
818        // Generate description context
819        let mut context =
820            DescriptionContext::with_period(posting_date.month(), posting_date.year());
821
822        // Add vendor/customer context based on business process
823        match business_process {
824            BusinessProcess::P2P => {
825                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
826                    context.vendor_name = Some(vendor.name.clone());
827                }
828            }
829            BusinessProcess::O2C => {
830                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
831                    context.customer_name = Some(customer.name.clone());
832                }
833            }
834            _ => {}
835        }
836
837        // Generate header text if enabled
838        if self.template_config.descriptions.generate_header_text {
839            header.header_text = Some(self.description_generator.generate_header_text(
840                business_process,
841                &context,
842                &mut self.rng,
843            ));
844        }
845
846        // Generate reference if enabled
847        if self.template_config.references.generate_references {
848            header.reference = Some(
849                self.reference_generator
850                    .generate_for_process_year(business_process, posting_date.year()),
851            );
852        }
853
854        // Generate line items
855        let mut entry = JournalEntry::new(header);
856
857        // Generate amount - use fraud pattern if this is a fraudulent transaction
858        let base_amount = if let Some(ft) = fraud_type {
859            let pattern = self.fraud_type_to_amount_pattern(ft);
860            self.amount_sampler.sample_fraud(pattern)
861        } else {
862            self.amount_sampler.sample()
863        };
864
865        // Apply temporal drift if configured
866        let drift_adjusted_amount = {
867            let drift = self.get_drift_adjustments(posting_date);
868            if drift.amount_mean_multiplier != 1.0 {
869                // Apply drift multiplier (includes seasonal factor if enabled)
870                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
871                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
872                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
873            } else {
874                base_amount
875            }
876        };
877
878        // Apply human variation to amounts for non-automated transactions
879        let total_amount = if is_automated {
880            drift_adjusted_amount // Automated systems use exact amounts
881        } else {
882            self.apply_human_variation(drift_adjusted_amount)
883        };
884
885        // Generate debit lines
886        let debit_amounts = self
887            .amount_sampler
888            .sample_summing_to(line_spec.debit_count, total_amount);
889        for (i, amount) in debit_amounts.into_iter().enumerate() {
890            let account_number = self.select_debit_account().account_number.clone();
891            let mut line = JournalEntryLine::debit(
892                entry.header.document_id,
893                (i + 1) as u32,
894                account_number.clone(),
895                amount,
896            );
897
898            // Generate line text if enabled
899            if self.template_config.descriptions.generate_line_text {
900                line.line_text = Some(self.description_generator.generate_line_text(
901                    &account_number,
902                    &context,
903                    &mut self.rng,
904                ));
905            }
906
907            entry.add_line(line);
908        }
909
910        // Generate credit lines - use the SAME amounts to ensure balance
911        let credit_amounts = self
912            .amount_sampler
913            .sample_summing_to(line_spec.credit_count, total_amount);
914        for (i, amount) in credit_amounts.into_iter().enumerate() {
915            let account_number = self.select_credit_account().account_number.clone();
916            let mut line = JournalEntryLine::credit(
917                entry.header.document_id,
918                (line_spec.debit_count + i + 1) as u32,
919                account_number.clone(),
920                amount,
921            );
922
923            // Generate line text if enabled
924            if self.template_config.descriptions.generate_line_text {
925                line.line_text = Some(self.description_generator.generate_line_text(
926                    &account_number,
927                    &context,
928                    &mut self.rng,
929                ));
930            }
931
932            entry.add_line(line);
933        }
934
935        // Apply persona-based errors if enabled and it's a human user
936        if self.persona_errors_enabled && !is_automated {
937            self.maybe_inject_persona_error(&mut entry);
938        }
939
940        // Apply approval workflow if enabled and amount exceeds threshold
941        if self.approval_enabled {
942            self.maybe_apply_approval_workflow(&mut entry, posting_date);
943        }
944
945        // Maybe start a batch of similar entries for realism
946        self.maybe_start_batch(&entry);
947
948        entry
949    }
950
951    /// Enable or disable persona-based error injection.
952    ///
953    /// When enabled, entries created by human personas have a chance
954    /// to contain realistic human errors based on their experience level.
955    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
956        self.persona_errors_enabled = enabled;
957        self
958    }
959
960    /// Set fraud configuration for fraud injection.
961    ///
962    /// When fraud is enabled in the config, transactions have a chance
963    /// to be marked as fraudulent based on the configured fraud rate.
964    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
965        self.fraud_config = config;
966        self
967    }
968
969    /// Check if persona errors are enabled.
970    pub fn persona_errors_enabled(&self) -> bool {
971        self.persona_errors_enabled
972    }
973
974    /// Enable or disable batch processing behavior.
975    ///
976    /// When enabled (default), the generator will occasionally produce batches
977    /// of similar entries, simulating how humans batch similar work together.
978    pub fn with_batching(mut self, enabled: bool) -> Self {
979        if !enabled {
980            self.batch_state = None;
981        }
982        self
983    }
984
985    /// Check if batch processing is enabled.
986    pub fn batching_enabled(&self) -> bool {
987        // Batching is implicitly enabled when not explicitly disabled
988        true
989    }
990
991    /// Maybe start a batch based on the current entry.
992    ///
993    /// Humans often batch similar work: processing invoices from one vendor,
994    /// entering expense reports for a trip, reconciling similar items.
995    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
996        // Only start batch for non-automated, non-fraud entries
997        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
998            return;
999        }
1000
1001        // 15% chance to start a batch (most work is not batched)
1002        if self.rng.random::<f64>() > 0.15 {
1003            return;
1004        }
1005
1006        // Extract key attributes for batching
1007        let base_account = entry
1008            .lines
1009            .first()
1010            .map(|l| l.gl_account.clone())
1011            .unwrap_or_default();
1012
1013        let base_amount = entry.total_debit();
1014
1015        self.batch_state = Some(BatchState {
1016            base_account_number: base_account,
1017            base_amount,
1018            base_business_process: entry.header.business_process,
1019            base_posting_date: entry.header.posting_date,
1020            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1021        });
1022    }
1023
1024    /// Generate an entry that's part of the current batch.
1025    ///
1026    /// Batched entries have:
1027    /// - Same or very similar business process
1028    /// - Same posting date (batched work done together)
1029    /// - Similar amounts (within ±15%)
1030    /// - Same debit account (processing similar items)
1031    fn generate_batched_entry(&mut self) -> JournalEntry {
1032        use rust_decimal::Decimal;
1033
1034        // Decrement batch counter
1035        if let Some(ref mut state) = self.batch_state {
1036            state.remaining = state.remaining.saturating_sub(1);
1037        }
1038
1039        let batch = self
1040            .batch_state
1041            .clone()
1042            .expect("batch_state set before calling generate_batched_entry");
1043
1044        // Use the batch's posting date (work done on same day)
1045        let posting_date = batch.base_posting_date;
1046
1047        self.count += 1;
1048        let document_id = self.generate_deterministic_uuid();
1049
1050        // Select same company (batched work is usually same company)
1051        let company_code = self.company_selector.select(&mut self.rng).to_string();
1052
1053        // Use simplified line spec for batched entries (usually 2-line)
1054        let _line_spec = LineItemSpec {
1055            total_count: 2,
1056            debit_count: 1,
1057            credit_count: 1,
1058            split_type: DebitCreditSplit::Equal,
1059        };
1060
1061        // Batched entries are always manual
1062        let source = TransactionSource::Manual;
1063
1064        // Use the batch's business process
1065        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1066
1067        // Sample time
1068        let time = self.temporal_sampler.sample_time(true);
1069        let created_at = posting_date.and_time(time).and_utc();
1070
1071        // Same user for batched work
1072        let (created_by, user_persona) = self.select_user(false);
1073
1074        // Create header
1075        let mut header =
1076            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1077        header.created_at = created_at;
1078        header.source = source;
1079        header.created_by = created_by;
1080        header.user_persona = user_persona;
1081        header.business_process = Some(business_process);
1082
1083        // Generate similar amount (within ±15% of base)
1084        let variation = self.rng.random_range(-0.15..0.15);
1085        let varied_amount =
1086            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1087        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1088
1089        // Create the entry
1090        let mut entry = JournalEntry::new(header);
1091
1092        // Use same debit account as batch base
1093        let debit_line = JournalEntryLine::debit(
1094            entry.header.document_id,
1095            1,
1096            batch.base_account_number.clone(),
1097            total_amount,
1098        );
1099        entry.add_line(debit_line);
1100
1101        // Select a credit account
1102        let credit_account = self.select_credit_account().account_number.clone();
1103        let credit_line =
1104            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1105        entry.add_line(credit_line);
1106
1107        // Apply persona-based errors if enabled
1108        if self.persona_errors_enabled {
1109            self.maybe_inject_persona_error(&mut entry);
1110        }
1111
1112        // Apply approval workflow if enabled
1113        if self.approval_enabled {
1114            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1115        }
1116
1117        // Clear batch state if no more entries remaining
1118        if batch.remaining <= 1 {
1119            self.batch_state = None;
1120        }
1121
1122        entry
1123    }
1124
1125    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1126    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1127        // Parse persona from the entry header
1128        let persona_str = &entry.header.user_persona;
1129        let persona = match persona_str.to_lowercase().as_str() {
1130            s if s.contains("junior") => UserPersona::JuniorAccountant,
1131            s if s.contains("senior") => UserPersona::SeniorAccountant,
1132            s if s.contains("controller") => UserPersona::Controller,
1133            s if s.contains("manager") => UserPersona::Manager,
1134            s if s.contains("executive") => UserPersona::Executive,
1135            _ => return, // Don't inject errors for unknown personas
1136        };
1137
1138        // Get base error rate from persona
1139        let base_error_rate = persona.error_rate();
1140
1141        // Apply stress factors based on posting date
1142        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1143
1144        // Check if error should occur based on adjusted rate
1145        if self.rng.random::<f64>() >= adjusted_rate {
1146            return; // No error this time
1147        }
1148
1149        // Select and inject persona-appropriate error
1150        self.inject_human_error(entry, persona);
1151    }
1152
1153    /// Apply contextual stress factors to the base error rate.
1154    ///
1155    /// Stress factors increase error likelihood during:
1156    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1157    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1158    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1159    /// - Monday morning (catch-up work): 20% more errors
1160    /// - Friday afternoon (rushing to leave): 30% more errors
1161    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1162        use chrono::Datelike;
1163
1164        let mut rate = base_rate;
1165        let day = posting_date.day();
1166        let month = posting_date.month();
1167
1168        // Year-end stress (December 28-31): double the error rate
1169        if month == 12 && day >= 28 {
1170            rate *= 2.0;
1171            return rate.min(0.5); // Cap at 50% to keep it realistic
1172        }
1173
1174        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1175        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1176            rate *= 1.75; // 75% more errors at quarter end
1177            return rate.min(0.4);
1178        }
1179
1180        // Month-end stress (last 3 days of month)
1181        if day >= 28 {
1182            rate *= 1.5; // 50% more errors at month end
1183        }
1184
1185        // Day-of-week stress effects
1186        let weekday = posting_date.weekday();
1187        match weekday {
1188            chrono::Weekday::Mon => {
1189                // Monday: catching up, often rushed
1190                rate *= 1.2;
1191            }
1192            chrono::Weekday::Fri => {
1193                // Friday: rushing to finish before weekend
1194                rate *= 1.3;
1195            }
1196            _ => {}
1197        }
1198
1199        // Cap at 40% to keep it realistic
1200        rate.min(0.4)
1201    }
1202
1203    /// Apply human-like variation to an amount.
1204    ///
1205    /// Humans don't enter perfectly calculated amounts - they:
1206    /// - Round amounts differently
1207    /// - Estimate instead of calculating exactly
1208    /// - Make small input variations
1209    ///
1210    /// This applies small variations (typically ±2%) to make amounts more realistic.
1211    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1212        use rust_decimal::Decimal;
1213
1214        // Automated transactions or very small amounts don't get variation
1215        if amount < Decimal::from(10) {
1216            return amount;
1217        }
1218
1219        // 70% chance of human variation being applied
1220        if self.rng.random::<f64>() > 0.70 {
1221            return amount;
1222        }
1223
1224        // Decide which type of human variation to apply
1225        let variation_type: u8 = self.rng.random_range(0..4);
1226
1227        match variation_type {
1228            0 => {
1229                // ±2% variation (common for estimated amounts)
1230                let variation_pct = self.rng.random_range(-0.02..0.02);
1231                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1232                (amount + variation).round_dp(2)
1233            }
1234            1 => {
1235                // Round to nearest $10
1236                let ten = Decimal::from(10);
1237                (amount / ten).round() * ten
1238            }
1239            2 => {
1240                // Round to nearest $100 (for larger amounts)
1241                if amount >= Decimal::from(500) {
1242                    let hundred = Decimal::from(100);
1243                    (amount / hundred).round() * hundred
1244                } else {
1245                    amount
1246                }
1247            }
1248            3 => {
1249                // Slight under/over payment (±$0.01 to ±$1.00)
1250                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1251                (amount + cents).max(Decimal::ZERO).round_dp(2)
1252            }
1253            _ => amount,
1254        }
1255    }
1256
1257    /// Rebalance an entry after a one-sided amount modification.
1258    ///
1259    /// When an error modifies one line's amount, this finds a line on the opposite
1260    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1261    /// same impact to maintain balance.
1262    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1263        // Find a line on the opposite side to adjust
1264        let balancing_idx = entry.lines.iter().position(|l| {
1265            if modified_was_debit {
1266                l.credit_amount > Decimal::ZERO
1267            } else {
1268                l.debit_amount > Decimal::ZERO
1269            }
1270        });
1271
1272        if let Some(idx) = balancing_idx {
1273            if modified_was_debit {
1274                entry.lines[idx].credit_amount += impact;
1275            } else {
1276                entry.lines[idx].debit_amount += impact;
1277            }
1278        }
1279    }
1280
1281    /// Inject a human-like error based on the persona.
1282    ///
1283    /// All error types maintain balance - amount modifications are applied to both sides.
1284    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1285    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1286        use rust_decimal::Decimal;
1287
1288        // Different personas make different types of errors
1289        let error_type: u8 = match persona {
1290            UserPersona::JuniorAccountant => {
1291                // Junior accountants make more varied errors
1292                self.rng.random_range(0..5)
1293            }
1294            UserPersona::SeniorAccountant => {
1295                // Senior accountants mainly make transposition errors
1296                self.rng.random_range(0..3)
1297            }
1298            UserPersona::Controller | UserPersona::Manager => {
1299                // Controllers/managers mainly make rounding or cutoff errors
1300                self.rng.random_range(3..5)
1301            }
1302            _ => return,
1303        };
1304
1305        match error_type {
1306            0 => {
1307                // Transposed digits in an amount
1308                if let Some(line) = entry.lines.get_mut(0) {
1309                    let is_debit = line.debit_amount > Decimal::ZERO;
1310                    let original_amount = if is_debit {
1311                        line.debit_amount
1312                    } else {
1313                        line.credit_amount
1314                    };
1315
1316                    // Simple digit swap in the string representation
1317                    let s = original_amount.to_string();
1318                    if s.len() >= 2 {
1319                        let chars: Vec<char> = s.chars().collect();
1320                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1321                        if chars[pos].is_ascii_digit()
1322                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1323                        {
1324                            let mut new_chars = chars;
1325                            new_chars.swap(pos, pos + 1);
1326                            if let Ok(new_amount) =
1327                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1328                            {
1329                                let impact = new_amount - original_amount;
1330
1331                                // Apply to the modified line
1332                                if is_debit {
1333                                    entry.lines[0].debit_amount = new_amount;
1334                                } else {
1335                                    entry.lines[0].credit_amount = new_amount;
1336                                }
1337
1338                                // Rebalance the entry
1339                                Self::rebalance_entry(entry, is_debit, impact);
1340
1341                                entry.header.header_text = Some(
1342                                    entry.header.header_text.clone().unwrap_or_default()
1343                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1344                                );
1345                            }
1346                        }
1347                    }
1348                }
1349            }
1350            1 => {
1351                // Wrong decimal place (off by factor of 10)
1352                if let Some(line) = entry.lines.get_mut(0) {
1353                    let is_debit = line.debit_amount > Decimal::ZERO;
1354                    let original_amount = if is_debit {
1355                        line.debit_amount
1356                    } else {
1357                        line.credit_amount
1358                    };
1359
1360                    let new_amount = original_amount * Decimal::new(10, 0);
1361                    let impact = new_amount - original_amount;
1362
1363                    // Apply to the modified line
1364                    if is_debit {
1365                        entry.lines[0].debit_amount = new_amount;
1366                    } else {
1367                        entry.lines[0].credit_amount = new_amount;
1368                    }
1369
1370                    // Rebalance the entry
1371                    Self::rebalance_entry(entry, is_debit, impact);
1372
1373                    entry.header.header_text = Some(
1374                        entry.header.header_text.clone().unwrap_or_default()
1375                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1376                    );
1377                }
1378            }
1379            2 => {
1380                // Typo in description (doesn't affect balance)
1381                if let Some(ref mut text) = entry.header.header_text {
1382                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1383                    let correct = ["the", "and", "with", "that", "receive"];
1384                    let idx = self.rng.random_range(0..typos.len());
1385                    if text.to_lowercase().contains(correct[idx]) {
1386                        *text = text.replace(correct[idx], typos[idx]);
1387                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1388                    }
1389                }
1390            }
1391            3 => {
1392                // Rounding to round number
1393                if let Some(line) = entry.lines.get_mut(0) {
1394                    let is_debit = line.debit_amount > Decimal::ZERO;
1395                    let original_amount = if is_debit {
1396                        line.debit_amount
1397                    } else {
1398                        line.credit_amount
1399                    };
1400
1401                    let new_amount =
1402                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1403                    let impact = new_amount - original_amount;
1404
1405                    // Apply to the modified line
1406                    if is_debit {
1407                        entry.lines[0].debit_amount = new_amount;
1408                    } else {
1409                        entry.lines[0].credit_amount = new_amount;
1410                    }
1411
1412                    // Rebalance the entry
1413                    Self::rebalance_entry(entry, is_debit, impact);
1414
1415                    entry.header.header_text = Some(
1416                        entry.header.header_text.clone().unwrap_or_default()
1417                            + " [HUMAN_ERROR:ROUNDED]",
1418                    );
1419                }
1420            }
1421            4 => {
1422                // Late posting marker (document date much earlier than posting date)
1423                // This doesn't create an imbalance
1424                if entry.header.document_date == entry.header.posting_date {
1425                    let days_late = self.rng.random_range(5..15);
1426                    entry.header.document_date =
1427                        entry.header.posting_date - chrono::Duration::days(days_late);
1428                    entry.header.header_text = Some(
1429                        entry.header.header_text.clone().unwrap_or_default()
1430                            + " [HUMAN_ERROR:LATE_POSTING]",
1431                    );
1432                }
1433            }
1434            _ => {}
1435        }
1436    }
1437
1438    /// Apply approval workflow for high-value transactions.
1439    ///
1440    /// If the entry amount exceeds the approval threshold, simulate an
1441    /// approval workflow with appropriate approvers based on amount.
1442    fn maybe_apply_approval_workflow(
1443        &mut self,
1444        entry: &mut JournalEntry,
1445        _posting_date: NaiveDate,
1446    ) {
1447        use rust_decimal::Decimal;
1448
1449        let amount = entry.total_debit();
1450
1451        // Skip if amount is below threshold
1452        if amount <= self.approval_threshold {
1453            // Auto-approved below threshold
1454            let workflow = ApprovalWorkflow::auto_approved(
1455                entry.header.created_by.clone(),
1456                entry.header.user_persona.clone(),
1457                amount,
1458                entry.header.created_at,
1459            );
1460            entry.header.approval_workflow = Some(workflow);
1461            return;
1462        }
1463
1464        // Mark as SOX relevant for high-value transactions
1465        entry.header.sox_relevant = true;
1466
1467        // Determine required approval levels based on amount
1468        let required_levels = if amount > Decimal::new(100000, 0) {
1469            3 // Executive approval required
1470        } else if amount > Decimal::new(50000, 0) {
1471            2 // Senior management approval
1472        } else {
1473            1 // Manager approval
1474        };
1475
1476        // Create the approval workflow
1477        let mut workflow = ApprovalWorkflow::new(
1478            entry.header.created_by.clone(),
1479            entry.header.user_persona.clone(),
1480            amount,
1481        );
1482        workflow.required_levels = required_levels;
1483
1484        // Simulate submission
1485        let submit_time = entry.header.created_at;
1486        let submit_action = ApprovalAction::new(
1487            entry.header.created_by.clone(),
1488            entry.header.user_persona.clone(),
1489            self.parse_persona(&entry.header.user_persona),
1490            ApprovalActionType::Submit,
1491            0,
1492        )
1493        .with_timestamp(submit_time);
1494
1495        workflow.actions.push(submit_action);
1496        workflow.status = ApprovalStatus::Pending;
1497        workflow.submitted_at = Some(submit_time);
1498
1499        // Simulate approvals with realistic delays
1500        let mut current_time = submit_time;
1501        for level in 1..=required_levels {
1502            // Add delay for approval (1-3 business hours per level)
1503            let delay_hours = self.rng.random_range(1..4);
1504            current_time += chrono::Duration::hours(delay_hours);
1505
1506            // Skip weekends
1507            while current_time.weekday() == chrono::Weekday::Sat
1508                || current_time.weekday() == chrono::Weekday::Sun
1509            {
1510                current_time += chrono::Duration::days(1);
1511            }
1512
1513            // Generate approver based on level
1514            let (approver_id, approver_role) = self.select_approver(level);
1515
1516            let approve_action = ApprovalAction::new(
1517                approver_id.clone(),
1518                format!("{:?}", approver_role),
1519                approver_role,
1520                ApprovalActionType::Approve,
1521                level,
1522            )
1523            .with_timestamp(current_time);
1524
1525            workflow.actions.push(approve_action);
1526            workflow.current_level = level;
1527        }
1528
1529        // Mark as approved
1530        workflow.status = ApprovalStatus::Approved;
1531        workflow.approved_at = Some(current_time);
1532
1533        entry.header.approval_workflow = Some(workflow);
1534    }
1535
1536    /// Select an approver based on the required level.
1537    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1538        let persona = match level {
1539            1 => UserPersona::Manager,
1540            2 => UserPersona::Controller,
1541            _ => UserPersona::Executive,
1542        };
1543
1544        // Try to get from user pool first
1545        if let Some(ref pool) = self.user_pool {
1546            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1547                return (user.user_id.clone(), persona);
1548            }
1549        }
1550
1551        // Fallback to generated approver
1552        let approver_id = match persona {
1553            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1554            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1555            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1556            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1557        };
1558
1559        (approver_id, persona)
1560    }
1561
1562    /// Parse user persona from string.
1563    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1564        match persona_str.to_lowercase().as_str() {
1565            s if s.contains("junior") => UserPersona::JuniorAccountant,
1566            s if s.contains("senior") => UserPersona::SeniorAccountant,
1567            s if s.contains("controller") => UserPersona::Controller,
1568            s if s.contains("manager") => UserPersona::Manager,
1569            s if s.contains("executive") => UserPersona::Executive,
1570            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1571            _ => UserPersona::JuniorAccountant, // Default
1572        }
1573    }
1574
1575    /// Enable or disable approval workflow.
1576    pub fn with_approval(mut self, enabled: bool) -> Self {
1577        self.approval_enabled = enabled;
1578        self
1579    }
1580
1581    /// Set the approval threshold amount.
1582    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1583        self.approval_threshold = threshold;
1584        self
1585    }
1586
1587    /// Set the temporal drift controller for simulating distribution changes over time.
1588    ///
1589    /// When drift is enabled, amounts and other distributions will shift based on
1590    /// the period (month) to simulate realistic temporal evolution like inflation
1591    /// or increasing fraud rates.
1592    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1593        self.drift_controller = Some(controller);
1594        self
1595    }
1596
1597    /// Set drift configuration directly.
1598    ///
1599    /// Creates a drift controller from the config. Total periods is calculated
1600    /// from the date range.
1601    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1602        if config.enabled {
1603            let total_periods = self.calculate_total_periods();
1604            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1605        }
1606        self
1607    }
1608
1609    /// Calculate total periods (months) in the date range.
1610    fn calculate_total_periods(&self) -> u32 {
1611        let start_year = self.start_date.year();
1612        let start_month = self.start_date.month();
1613        let end_year = self.end_date.year();
1614        let end_month = self.end_date.month();
1615
1616        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1617    }
1618
1619    /// Calculate the period number (0-indexed) for a given date.
1620    fn date_to_period(&self, date: NaiveDate) -> u32 {
1621        let start_year = self.start_date.year();
1622        let start_month = self.start_date.month() as i32;
1623        let date_year = date.year();
1624        let date_month = date.month() as i32;
1625
1626        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1627    }
1628
1629    /// Get drift adjustments for a given date.
1630    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1631        if let Some(ref controller) = self.drift_controller {
1632            let period = self.date_to_period(date);
1633            controller.compute_adjustments(period)
1634        } else {
1635            DriftAdjustments::none()
1636        }
1637    }
1638
1639    /// Select a user from the pool or generate a generic user ID.
1640    #[inline]
1641    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1642        if let Some(ref pool) = self.user_pool {
1643            let persona = if is_automated {
1644                UserPersona::AutomatedSystem
1645            } else {
1646                // Random distribution among human personas
1647                let roll: f64 = self.rng.random();
1648                if roll < 0.4 {
1649                    UserPersona::JuniorAccountant
1650                } else if roll < 0.7 {
1651                    UserPersona::SeniorAccountant
1652                } else if roll < 0.85 {
1653                    UserPersona::Controller
1654                } else {
1655                    UserPersona::Manager
1656                }
1657            };
1658
1659            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1660                return (
1661                    user.user_id.clone(),
1662                    format!("{:?}", user.persona).to_lowercase(),
1663                );
1664            }
1665        }
1666
1667        // Fallback to generic format
1668        if is_automated {
1669            (
1670                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1671                "automated_system".to_string(),
1672            )
1673        } else {
1674            (
1675                format!("USER{:04}", self.rng.random_range(1..=40)),
1676                "senior_accountant".to_string(),
1677            )
1678        }
1679    }
1680
1681    /// Select transaction source based on configuration weights.
1682    #[inline]
1683    fn select_source(&mut self) -> TransactionSource {
1684        let roll: f64 = self.rng.random();
1685        let dist = &self.config.source_distribution;
1686
1687        if roll < dist.manual {
1688            TransactionSource::Manual
1689        } else if roll < dist.manual + dist.automated {
1690            TransactionSource::Automated
1691        } else if roll < dist.manual + dist.automated + dist.recurring {
1692            TransactionSource::Recurring
1693        } else {
1694            TransactionSource::Adjustment
1695        }
1696    }
1697
1698    /// Select a business process based on configuration weights.
1699    #[inline]
1700    fn select_business_process(&mut self) -> BusinessProcess {
1701        let roll: f64 = self.rng.random();
1702
1703        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1704        if roll < 0.35 {
1705            BusinessProcess::O2C
1706        } else if roll < 0.65 {
1707            BusinessProcess::P2P
1708        } else if roll < 0.85 {
1709            BusinessProcess::R2R
1710        } else if roll < 0.95 {
1711            BusinessProcess::H2R
1712        } else {
1713            BusinessProcess::A2R
1714        }
1715    }
1716
1717    #[inline]
1718    fn select_debit_account(&mut self) -> &GLAccount {
1719        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1720        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1721
1722        // 60% asset, 40% expense for debits
1723        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1724            accounts
1725        } else {
1726            expense_accounts
1727        };
1728
1729        all.choose(&mut self.rng)
1730            .copied()
1731            .unwrap_or_else(|| &self.coa.accounts[0])
1732    }
1733
1734    #[inline]
1735    fn select_credit_account(&mut self) -> &GLAccount {
1736        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1737        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1738
1739        // 60% liability, 40% revenue for credits
1740        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1741            liability_accounts
1742        } else {
1743            revenue_accounts
1744        };
1745
1746        all.choose(&mut self.rng)
1747            .copied()
1748            .unwrap_or_else(|| &self.coa.accounts[0])
1749    }
1750}
1751
1752impl Generator for JournalEntryGenerator {
1753    type Item = JournalEntry;
1754    type Config = (
1755        TransactionConfig,
1756        Arc<ChartOfAccounts>,
1757        Vec<String>,
1758        NaiveDate,
1759        NaiveDate,
1760    );
1761
1762    fn new(config: Self::Config, seed: u64) -> Self {
1763        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1764    }
1765
1766    fn generate_one(&mut self) -> Self::Item {
1767        self.generate()
1768    }
1769
1770    fn reset(&mut self) {
1771        self.rng = seeded_rng(self.seed, 0);
1772        self.line_sampler.reset(self.seed + 1);
1773        self.amount_sampler.reset(self.seed + 2);
1774        self.temporal_sampler.reset(self.seed + 3);
1775        self.count = 0;
1776        self.uuid_factory.reset();
1777
1778        // Reset reference generator by recreating it
1779        let mut ref_gen = ReferenceGenerator::new(
1780            self.start_date.year(),
1781            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1782        );
1783        ref_gen.set_prefix(
1784            ReferenceType::Invoice,
1785            &self.template_config.references.invoice_prefix,
1786        );
1787        ref_gen.set_prefix(
1788            ReferenceType::PurchaseOrder,
1789            &self.template_config.references.po_prefix,
1790        );
1791        ref_gen.set_prefix(
1792            ReferenceType::SalesOrder,
1793            &self.template_config.references.so_prefix,
1794        );
1795        self.reference_generator = ref_gen;
1796    }
1797
1798    fn count(&self) -> u64 {
1799        self.count
1800    }
1801
1802    fn seed(&self) -> u64 {
1803        self.seed
1804    }
1805}
1806
1807use datasynth_core::traits::ParallelGenerator;
1808
1809impl ParallelGenerator for JournalEntryGenerator {
1810    /// Split this generator into `parts` independent sub-generators.
1811    ///
1812    /// Each sub-generator gets a deterministic seed derived from the parent seed
1813    /// and its partition index, plus a partitioned UUID factory to avoid contention.
1814    /// The results are deterministic for a given partition count.
1815    fn split(self, parts: usize) -> Vec<Self> {
1816        let parts = parts.max(1);
1817        (0..parts)
1818            .map(|i| {
1819                // Derive a unique seed per partition using a golden-ratio constant
1820                let sub_seed = self
1821                    .seed
1822                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1823
1824                let mut gen = JournalEntryGenerator::new_with_full_config(
1825                    self.config.clone(),
1826                    Arc::clone(&self.coa),
1827                    self.companies.clone(),
1828                    self.start_date,
1829                    self.end_date,
1830                    sub_seed,
1831                    self.template_config.clone(),
1832                    self.user_pool.clone(),
1833                );
1834
1835                // Copy over configuration state
1836                gen.company_selector = self.company_selector.clone();
1837                gen.vendor_pool = self.vendor_pool.clone();
1838                gen.customer_pool = self.customer_pool.clone();
1839                gen.material_pool = self.material_pool.clone();
1840                gen.using_real_master_data = self.using_real_master_data;
1841                gen.fraud_config = self.fraud_config.clone();
1842                gen.persona_errors_enabled = self.persona_errors_enabled;
1843                gen.approval_enabled = self.approval_enabled;
1844                gen.approval_threshold = self.approval_threshold;
1845
1846                // Use partitioned UUID factory to eliminate atomic contention
1847                gen.uuid_factory = DeterministicUuidFactory::for_partition(
1848                    sub_seed,
1849                    GeneratorType::JournalEntry,
1850                    i as u8,
1851                );
1852
1853                // Copy temporal patterns if configured
1854                if let Some(ref config) = self.temporal_patterns_config {
1855                    gen.temporal_patterns_config = Some(config.clone());
1856                    // Rebuild business day calculator from the stored config
1857                    if config.business_days.enabled {
1858                        if let Some(ref bdc) = self.business_day_calculator {
1859                            gen.business_day_calculator = Some(bdc.clone());
1860                        }
1861                    }
1862                    // Rebuild processing lag calculator with partition seed
1863                    if config.processing_lags.enabled {
1864                        let lag_config =
1865                            Self::convert_processing_lag_config(&config.processing_lags);
1866                        gen.processing_lag_calculator =
1867                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
1868                    }
1869                }
1870
1871                // Copy drift controller if present
1872                if let Some(ref dc) = self.drift_controller {
1873                    gen.drift_controller = Some(dc.clone());
1874                }
1875
1876                gen
1877            })
1878            .collect()
1879    }
1880}
1881
1882#[cfg(test)]
1883#[allow(clippy::unwrap_used)]
1884mod tests {
1885    use super::*;
1886    use crate::ChartOfAccountsGenerator;
1887
1888    #[test]
1889    fn test_generate_balanced_entries() {
1890        let mut coa_gen =
1891            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1892        let coa = Arc::new(coa_gen.generate());
1893
1894        let mut je_gen = JournalEntryGenerator::new_with_params(
1895            TransactionConfig::default(),
1896            coa,
1897            vec!["1000".to_string()],
1898            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1899            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1900            42,
1901        );
1902
1903        let mut balanced_count = 0;
1904        for _ in 0..100 {
1905            let entry = je_gen.generate();
1906
1907            // Skip entries with human errors as they may be intentionally unbalanced
1908            let has_human_error = entry
1909                .header
1910                .header_text
1911                .as_ref()
1912                .map(|t| t.contains("[HUMAN_ERROR:"))
1913                .unwrap_or(false);
1914
1915            if !has_human_error {
1916                assert!(
1917                    entry.is_balanced(),
1918                    "Entry {:?} is not balanced",
1919                    entry.header.document_id
1920                );
1921                balanced_count += 1;
1922            }
1923            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1924        }
1925
1926        // Ensure most entries are balanced (human errors are rare)
1927        assert!(
1928            balanced_count >= 80,
1929            "Expected at least 80 balanced entries, got {}",
1930            balanced_count
1931        );
1932    }
1933
1934    #[test]
1935    fn test_deterministic_generation() {
1936        let mut coa_gen =
1937            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1938        let coa = Arc::new(coa_gen.generate());
1939
1940        let mut gen1 = JournalEntryGenerator::new_with_params(
1941            TransactionConfig::default(),
1942            Arc::clone(&coa),
1943            vec!["1000".to_string()],
1944            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1945            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1946            42,
1947        );
1948
1949        let mut gen2 = JournalEntryGenerator::new_with_params(
1950            TransactionConfig::default(),
1951            coa,
1952            vec!["1000".to_string()],
1953            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1954            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1955            42,
1956        );
1957
1958        for _ in 0..50 {
1959            let e1 = gen1.generate();
1960            let e2 = gen2.generate();
1961            assert_eq!(e1.header.document_id, e2.header.document_id);
1962            assert_eq!(e1.total_debit(), e2.total_debit());
1963        }
1964    }
1965
1966    #[test]
1967    fn test_templates_generate_descriptions() {
1968        let mut coa_gen =
1969            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1970        let coa = Arc::new(coa_gen.generate());
1971
1972        // Enable all template features
1973        let template_config = TemplateConfig {
1974            names: datasynth_config::schema::NameTemplateConfig {
1975                generate_realistic_names: true,
1976                email_domain: "test.com".to_string(),
1977                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1978            },
1979            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1980                generate_header_text: true,
1981                generate_line_text: true,
1982            },
1983            references: datasynth_config::schema::ReferenceTemplateConfig {
1984                generate_references: true,
1985                invoice_prefix: "TEST-INV".to_string(),
1986                po_prefix: "TEST-PO".to_string(),
1987                so_prefix: "TEST-SO".to_string(),
1988            },
1989        };
1990
1991        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1992            TransactionConfig::default(),
1993            coa,
1994            vec!["1000".to_string()],
1995            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1996            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1997            42,
1998            template_config,
1999            None,
2000        )
2001        .with_persona_errors(false); // Disable for template testing
2002
2003        for _ in 0..10 {
2004            let entry = je_gen.generate();
2005
2006            // Verify header text is populated
2007            assert!(
2008                entry.header.header_text.is_some(),
2009                "Header text should be populated"
2010            );
2011
2012            // Verify reference is populated
2013            assert!(
2014                entry.header.reference.is_some(),
2015                "Reference should be populated"
2016            );
2017
2018            // Verify business process is set
2019            assert!(
2020                entry.header.business_process.is_some(),
2021                "Business process should be set"
2022            );
2023
2024            // Verify line text is populated
2025            for line in &entry.lines {
2026                assert!(line.line_text.is_some(), "Line text should be populated");
2027            }
2028
2029            // Entry should still be balanced
2030            assert!(entry.is_balanced());
2031        }
2032    }
2033
2034    #[test]
2035    fn test_user_pool_integration() {
2036        let mut coa_gen =
2037            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2038        let coa = Arc::new(coa_gen.generate());
2039
2040        let companies = vec!["1000".to_string()];
2041
2042        // Generate user pool
2043        let mut user_gen = crate::UserGenerator::new(42);
2044        let user_pool = user_gen.generate_standard(&companies);
2045
2046        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2047            TransactionConfig::default(),
2048            coa,
2049            companies,
2050            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2051            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2052            42,
2053            TemplateConfig::default(),
2054            Some(user_pool),
2055        );
2056
2057        // Generate entries and verify user IDs are from pool
2058        for _ in 0..20 {
2059            let entry = je_gen.generate();
2060
2061            // User ID should not be generic BATCH/USER format when pool is used
2062            // (though it may still fall back if random selection misses)
2063            assert!(!entry.header.created_by.is_empty());
2064        }
2065    }
2066
2067    #[test]
2068    fn test_master_data_connection() {
2069        let mut coa_gen =
2070            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2071        let coa = Arc::new(coa_gen.generate());
2072
2073        // Create test vendors
2074        let vendors = vec![
2075            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2076            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2077        ];
2078
2079        // Create test customers
2080        let customers = vec![
2081            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2082            Customer::new(
2083                "C-TEST-002",
2084                "Test Customer Two",
2085                CustomerType::SmallBusiness,
2086            ),
2087        ];
2088
2089        // Create test materials
2090        let materials = vec![Material::new(
2091            "MAT-TEST-001",
2092            "Test Material A",
2093            MaterialType::RawMaterial,
2094        )];
2095
2096        // Create generator with master data
2097        let generator = JournalEntryGenerator::new_with_params(
2098            TransactionConfig::default(),
2099            coa,
2100            vec!["1000".to_string()],
2101            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2102            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2103            42,
2104        );
2105
2106        // Without master data
2107        assert!(!generator.is_using_real_master_data());
2108
2109        // Connect master data
2110        let generator_with_data = generator
2111            .with_vendors(&vendors)
2112            .with_customers(&customers)
2113            .with_materials(&materials);
2114
2115        // Should now be using real master data
2116        assert!(generator_with_data.is_using_real_master_data());
2117    }
2118
2119    #[test]
2120    fn test_with_master_data_convenience_method() {
2121        let mut coa_gen =
2122            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2123        let coa = Arc::new(coa_gen.generate());
2124
2125        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2126        let customers = vec![Customer::new(
2127            "C-001",
2128            "Customer One",
2129            CustomerType::Corporate,
2130        )];
2131        let materials = vec![Material::new(
2132            "MAT-001",
2133            "Material One",
2134            MaterialType::RawMaterial,
2135        )];
2136
2137        let generator = JournalEntryGenerator::new_with_params(
2138            TransactionConfig::default(),
2139            coa,
2140            vec!["1000".to_string()],
2141            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2142            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2143            42,
2144        )
2145        .with_master_data(&vendors, &customers, &materials);
2146
2147        assert!(generator.is_using_real_master_data());
2148    }
2149
2150    #[test]
2151    fn test_stress_factors_increase_error_rate() {
2152        let mut coa_gen =
2153            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2154        let coa = Arc::new(coa_gen.generate());
2155
2156        let generator = JournalEntryGenerator::new_with_params(
2157            TransactionConfig::default(),
2158            coa,
2159            vec!["1000".to_string()],
2160            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2161            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2162            42,
2163        );
2164
2165        let base_rate = 0.1;
2166
2167        // Regular day - no stress factors
2168        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2169        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2170        assert!(
2171            (regular_rate - base_rate).abs() < 0.01,
2172            "Regular day should have minimal stress factor adjustment"
2173        );
2174
2175        // Month end - 50% more errors
2176        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2177        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2178        assert!(
2179            month_end_rate > regular_rate,
2180            "Month end should have higher error rate than regular day"
2181        );
2182
2183        // Year end - double the error rate
2184        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2185        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2186        assert!(
2187            year_end_rate > month_end_rate,
2188            "Year end should have highest error rate"
2189        );
2190
2191        // Friday stress
2192        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2193        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2194        assert!(
2195            friday_rate > regular_rate,
2196            "Friday should have higher error rate than mid-week"
2197        );
2198
2199        // Monday stress
2200        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2201        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2202        assert!(
2203            monday_rate > regular_rate,
2204            "Monday should have higher error rate than mid-week"
2205        );
2206    }
2207
2208    #[test]
2209    fn test_batching_produces_similar_entries() {
2210        let mut coa_gen =
2211            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2212        let coa = Arc::new(coa_gen.generate());
2213
2214        // Use seed 123 which is more likely to trigger batching
2215        let mut je_gen = JournalEntryGenerator::new_with_params(
2216            TransactionConfig::default(),
2217            coa,
2218            vec!["1000".to_string()],
2219            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2220            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2221            123,
2222        )
2223        .with_persona_errors(false); // Disable to ensure balanced entries
2224
2225        // Generate many entries - at 15% batch rate, should see some batches
2226        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2227
2228        // Check that all entries are balanced (batched or not)
2229        for entry in &entries {
2230            assert!(
2231                entry.is_balanced(),
2232                "All entries including batched should be balanced"
2233            );
2234        }
2235
2236        // Count entries with same-day posting dates (batch indicator)
2237        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2238            std::collections::HashMap::new();
2239        for entry in &entries {
2240            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2241        }
2242
2243        // With batching, some dates should have multiple entries
2244        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2245        assert!(
2246            dates_with_multiple > 0,
2247            "With batching, should see some dates with multiple entries"
2248        );
2249    }
2250
2251    #[test]
2252    fn test_temporal_patterns_business_days() {
2253        use datasynth_config::schema::{
2254            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2255        };
2256
2257        let mut coa_gen =
2258            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2259        let coa = Arc::new(coa_gen.generate());
2260
2261        // Create temporal patterns config with business days enabled
2262        let temporal_config = TemporalPatternsConfig {
2263            enabled: true,
2264            business_days: BusinessDaySchemaConfig {
2265                enabled: true,
2266                ..Default::default()
2267            },
2268            calendars: CalendarSchemaConfig {
2269                regions: vec!["US".to_string()],
2270                custom_holidays: vec![],
2271            },
2272            ..Default::default()
2273        };
2274
2275        let mut je_gen = JournalEntryGenerator::new_with_params(
2276            TransactionConfig::default(),
2277            coa,
2278            vec!["1000".to_string()],
2279            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2280            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2281            42,
2282        )
2283        .with_temporal_patterns(temporal_config, 42)
2284        .with_persona_errors(false);
2285
2286        // Generate entries and verify none fall on weekends
2287        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2288
2289        for entry in &entries {
2290            let weekday = entry.header.posting_date.weekday();
2291            assert!(
2292                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2293                "Posting date {:?} should not be a weekend",
2294                entry.header.posting_date
2295            );
2296        }
2297    }
2298}