Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172        );
173        ref_gen.set_prefix(
174            ReferenceType::Invoice,
175            &template_config.references.invoice_prefix,
176        );
177        ref_gen.set_prefix(
178            ReferenceType::PurchaseOrder,
179            &template_config.references.po_prefix,
180        );
181        ref_gen.set_prefix(
182            ReferenceType::SalesOrder,
183            &template_config.references.so_prefix,
184        );
185
186        // Create weighted company selector (uniform weights for this constructor)
187        let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189        Self {
190            rng: seeded_rng(seed, 0),
191            seed,
192            config: config.clone(),
193            coa,
194            companies,
195            company_selector,
196            line_sampler: LineItemSampler::with_config(
197                seed + 1,
198                config.line_item_distribution.clone(),
199                config.even_odd_distribution.clone(),
200                config.debit_credit_distribution.clone(),
201            ),
202            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203            temporal_sampler: TemporalSampler::with_config(
204                seed + 3,
205                config.seasonality.clone(),
206                WorkingHoursConfig::default(),
207                Vec::new(),
208            ),
209            start_date,
210            end_date,
211            count: 0,
212            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213            user_pool,
214            description_generator: DescriptionGenerator::new(),
215            reference_generator: ref_gen,
216            template_config,
217            vendor_pool: VendorPool::standard(),
218            customer_pool: CustomerPool::standard(),
219            material_pool: None,
220            using_real_master_data: false,
221            fraud_config: FraudConfig::default(),
222            persona_errors_enabled: true, // Enable by default for realism
223            approval_enabled: true,       // Enable by default for realism
224            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
225            batch_state: None,
226            drift_controller: None,
227            // Always provide a basic BusinessDayCalculator so that weekend/holiday
228            // filtering is active even when temporal_patterns is not explicitly enabled.
229            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
230                Region::US,
231                start_date.year(),
232            ))),
233            processing_lag_calculator: None,
234            temporal_patterns_config: None,
235        }
236    }
237
238    /// Create from a full GeneratorConfig.
239    ///
240    /// This constructor uses the volume_weight from company configs
241    /// for weighted company selection, and fraud config from GeneratorConfig.
242    pub fn from_generator_config(
243        full_config: &GeneratorConfig,
244        coa: Arc<ChartOfAccounts>,
245        start_date: NaiveDate,
246        end_date: NaiveDate,
247        seed: u64,
248    ) -> Self {
249        let companies: Vec<String> = full_config
250            .companies
251            .iter()
252            .map(|c| c.code.clone())
253            .collect();
254
255        // Create weighted selector using volume_weight from company configs
256        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
257
258        let mut generator = Self::new_with_full_config(
259            full_config.transactions.clone(),
260            coa,
261            companies,
262            start_date,
263            end_date,
264            seed,
265            full_config.templates.clone(),
266            None,
267        );
268
269        // Override the uniform selector with weighted selector
270        generator.company_selector = company_selector;
271
272        // Set fraud config
273        generator.fraud_config = full_config.fraud.clone();
274
275        // Configure temporal patterns if enabled
276        let temporal_config = &full_config.temporal_patterns;
277        if temporal_config.enabled {
278            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
279        }
280
281        generator
282    }
283
284    /// Configure temporal patterns including business day calculations and processing lags.
285    ///
286    /// This enables realistic temporal behavior including:
287    /// - Business day awareness (no postings on weekends/holidays)
288    /// - Processing lag modeling (event-to-posting delays)
289    /// - Period-end dynamics (volume spikes at month/quarter/year end)
290    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
291        // Create business day calculator if enabled
292        if config.business_days.enabled {
293            let region = config
294                .calendars
295                .regions
296                .first()
297                .map(|r| Self::parse_region(r))
298                .unwrap_or(Region::US);
299
300            let calendar = HolidayCalendar::new(region, self.start_date.year());
301            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
302        }
303
304        // Create processing lag calculator if enabled
305        if config.processing_lags.enabled {
306            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
307            self.processing_lag_calculator =
308                Some(ProcessingLagCalculator::with_config(seed, lag_config));
309        }
310
311        // Create period-end dynamics if configured
312        let model = config.period_end.model.as_deref().unwrap_or("flat");
313        if model != "flat"
314            || config
315                .period_end
316                .month_end
317                .as_ref()
318                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
319        {
320            let dynamics = Self::convert_period_end_config(&config.period_end);
321            self.temporal_sampler.set_period_end_dynamics(dynamics);
322        }
323
324        self.temporal_patterns_config = Some(config);
325        self
326    }
327
328    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
329    ///
330    /// This is an alternative to [`with_temporal_patterns`] that derives the
331    /// holiday calendar from a country-pack definition rather than the built-in
332    /// region-based calendars.  All other temporal behaviour (business-day
333    /// adjustment, processing lags, period-end dynamics) is configured
334    /// identically.
335    pub fn with_country_pack_temporal(
336        mut self,
337        config: TemporalPatternsConfig,
338        seed: u64,
339        pack: &CountryPack,
340    ) -> Self {
341        // Create business day calculator using the country pack calendar
342        if config.business_days.enabled {
343            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
344            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
345        }
346
347        // Create processing lag calculator if enabled
348        if config.processing_lags.enabled {
349            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
350            self.processing_lag_calculator =
351                Some(ProcessingLagCalculator::with_config(seed, lag_config));
352        }
353
354        // Create period-end dynamics if configured
355        let model = config.period_end.model.as_deref().unwrap_or("flat");
356        if model != "flat"
357            || config
358                .period_end
359                .month_end
360                .as_ref()
361                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
362        {
363            let dynamics = Self::convert_period_end_config(&config.period_end);
364            self.temporal_sampler.set_period_end_dynamics(dynamics);
365        }
366
367        self.temporal_patterns_config = Some(config);
368        self
369    }
370
371    /// Convert schema processing lag config to core config.
372    fn convert_processing_lag_config(
373        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
374    ) -> ProcessingLagConfig {
375        let mut config = ProcessingLagConfig {
376            enabled: schema.enabled,
377            ..Default::default()
378        };
379
380        // Helper to convert lag schema to distribution
381        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
382            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
383            if let Some(min) = lag.min_hours {
384                dist.min_lag_hours = min;
385            }
386            if let Some(max) = lag.max_hours {
387                dist.max_lag_hours = max;
388            }
389            dist
390        };
391
392        // Apply event-specific lags
393        if let Some(ref lag) = schema.sales_order_lag {
394            config
395                .event_lags
396                .insert(EventType::SalesOrder, convert_lag(lag));
397        }
398        if let Some(ref lag) = schema.purchase_order_lag {
399            config
400                .event_lags
401                .insert(EventType::PurchaseOrder, convert_lag(lag));
402        }
403        if let Some(ref lag) = schema.goods_receipt_lag {
404            config
405                .event_lags
406                .insert(EventType::GoodsReceipt, convert_lag(lag));
407        }
408        if let Some(ref lag) = schema.invoice_receipt_lag {
409            config
410                .event_lags
411                .insert(EventType::InvoiceReceipt, convert_lag(lag));
412        }
413        if let Some(ref lag) = schema.invoice_issue_lag {
414            config
415                .event_lags
416                .insert(EventType::InvoiceIssue, convert_lag(lag));
417        }
418        if let Some(ref lag) = schema.payment_lag {
419            config
420                .event_lags
421                .insert(EventType::Payment, convert_lag(lag));
422        }
423        if let Some(ref lag) = schema.journal_entry_lag {
424            config
425                .event_lags
426                .insert(EventType::JournalEntry, convert_lag(lag));
427        }
428
429        // Apply cross-day posting config
430        if let Some(ref cross_day) = schema.cross_day_posting {
431            config.cross_day = CrossDayConfig {
432                enabled: cross_day.enabled,
433                probability_by_hour: cross_day.probability_by_hour.clone(),
434                ..Default::default()
435            };
436        }
437
438        config
439    }
440
441    /// Convert schema period-end config to core PeriodEndDynamics.
442    fn convert_period_end_config(
443        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
444    ) -> PeriodEndDynamics {
445        let model_type = schema.model.as_deref().unwrap_or("exponential");
446
447        // Helper to convert period config
448        let convert_period =
449            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
450             default_peak: f64|
451             -> PeriodEndConfig {
452                if let Some(p) = period {
453                    let model = match model_type {
454                        "flat" => PeriodEndModel::FlatMultiplier {
455                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
456                        },
457                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
458                            start_day: p.start_day.unwrap_or(-10),
459                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
460                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
461                            ramp_up_days: 3, // Default ramp-up period
462                        },
463                        _ => PeriodEndModel::ExponentialAcceleration {
464                            start_day: p.start_day.unwrap_or(-10),
465                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
466                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467                            decay_rate: p.decay_rate.unwrap_or(0.3),
468                        },
469                    };
470                    PeriodEndConfig {
471                        enabled: true,
472                        model,
473                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
474                    }
475                } else {
476                    PeriodEndConfig {
477                        enabled: true,
478                        model: PeriodEndModel::ExponentialAcceleration {
479                            start_day: -10,
480                            base_multiplier: 1.0,
481                            peak_multiplier: default_peak,
482                            decay_rate: 0.3,
483                        },
484                        additional_multiplier: 1.0,
485                    }
486                }
487            };
488
489        PeriodEndDynamics::new(
490            convert_period(schema.month_end.as_ref(), 2.0),
491            convert_period(schema.quarter_end.as_ref(), 3.5),
492            convert_period(schema.year_end.as_ref(), 5.0),
493        )
494    }
495
496    /// Parse a region string into a Region enum.
497    fn parse_region(region_str: &str) -> Region {
498        match region_str.to_uppercase().as_str() {
499            "US" => Region::US,
500            "DE" => Region::DE,
501            "GB" => Region::GB,
502            "CN" => Region::CN,
503            "JP" => Region::JP,
504            "IN" => Region::IN,
505            "BR" => Region::BR,
506            "MX" => Region::MX,
507            "AU" => Region::AU,
508            "SG" => Region::SG,
509            "KR" => Region::KR,
510            "FR" => Region::FR,
511            "IT" => Region::IT,
512            "ES" => Region::ES,
513            "CA" => Region::CA,
514            _ => Region::US,
515        }
516    }
517
518    /// Set a custom company selector.
519    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
520        self.company_selector = selector;
521    }
522
523    /// Get the current company selector.
524    pub fn company_selector(&self) -> &WeightedCompanySelector {
525        &self.company_selector
526    }
527
528    /// Set fraud configuration.
529    pub fn set_fraud_config(&mut self, config: FraudConfig) {
530        self.fraud_config = config;
531    }
532
533    /// Set vendors from generated master data.
534    ///
535    /// This replaces the default vendor pool with actual generated vendors,
536    /// ensuring JEs reference real master data entities.
537    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
538        if !vendors.is_empty() {
539            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
540            self.using_real_master_data = true;
541        }
542        self
543    }
544
545    /// Set customers from generated master data.
546    ///
547    /// This replaces the default customer pool with actual generated customers,
548    /// ensuring JEs reference real master data entities.
549    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
550        if !customers.is_empty() {
551            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
552            self.using_real_master_data = true;
553        }
554        self
555    }
556
557    /// Set materials from generated master data.
558    ///
559    /// This provides material references for JEs that involve inventory movements.
560    pub fn with_materials(mut self, materials: &[Material]) -> Self {
561        if !materials.is_empty() {
562            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
563            self.using_real_master_data = true;
564        }
565        self
566    }
567
568    /// Set all master data at once for convenience.
569    ///
570    /// This is the recommended way to configure the JE generator with
571    /// generated master data to ensure data coherence.
572    pub fn with_master_data(
573        self,
574        vendors: &[Vendor],
575        customers: &[Customer],
576        materials: &[Material],
577    ) -> Self {
578        self.with_vendors(vendors)
579            .with_customers(customers)
580            .with_materials(materials)
581    }
582
583    /// Replace the user pool with one generated from a [`CountryPack`].
584    ///
585    /// This is an alternative to the default name-culture distribution that
586    /// derives name pools and weights from the country-pack's `names` section.
587    /// The existing user pool (if any) is discarded and regenerated using
588    /// [`MultiCultureNameGenerator::from_country_pack`].
589    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
590        let name_gen =
591            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
592        let config = UserGeneratorConfig {
593            // The culture distribution is embedded in the name generator
594            // itself, so we use an empty list here.
595            culture_distribution: Vec::new(),
596            email_domain: name_gen.email_domain().to_string(),
597            generate_realistic_names: true,
598        };
599        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
600        self.user_pool = Some(user_gen.generate_standard(&self.companies));
601        self
602    }
603
604    /// Check if the generator is using real master data.
605    pub fn is_using_real_master_data(&self) -> bool {
606        self.using_real_master_data
607    }
608
609    /// Determine if this transaction should be fraudulent.
610    fn determine_fraud(&mut self) -> Option<FraudType> {
611        if !self.fraud_config.enabled {
612            return None;
613        }
614
615        // Roll for fraud based on fraud rate
616        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
617            return None;
618        }
619
620        // Select fraud type based on distribution
621        Some(self.select_fraud_type())
622    }
623
624    /// Select a fraud type based on the configured distribution.
625    fn select_fraud_type(&mut self) -> FraudType {
626        let dist = &self.fraud_config.fraud_type_distribution;
627        let roll: f64 = self.rng.random();
628
629        let mut cumulative = 0.0;
630
631        cumulative += dist.suspense_account_abuse;
632        if roll < cumulative {
633            return FraudType::SuspenseAccountAbuse;
634        }
635
636        cumulative += dist.fictitious_transaction;
637        if roll < cumulative {
638            return FraudType::FictitiousTransaction;
639        }
640
641        cumulative += dist.revenue_manipulation;
642        if roll < cumulative {
643            return FraudType::RevenueManipulation;
644        }
645
646        cumulative += dist.expense_capitalization;
647        if roll < cumulative {
648            return FraudType::ExpenseCapitalization;
649        }
650
651        cumulative += dist.split_transaction;
652        if roll < cumulative {
653            return FraudType::SplitTransaction;
654        }
655
656        cumulative += dist.timing_anomaly;
657        if roll < cumulative {
658            return FraudType::TimingAnomaly;
659        }
660
661        cumulative += dist.unauthorized_access;
662        if roll < cumulative {
663            return FraudType::UnauthorizedAccess;
664        }
665
666        // Default fallback
667        FraudType::DuplicatePayment
668    }
669
670    /// Map a fraud type to an amount pattern for suspicious amounts.
671    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
672        match fraud_type {
673            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
674                FraudAmountPattern::ThresholdAdjacent
675            }
676            FraudType::FictitiousTransaction
677            | FraudType::FictitiousEntry
678            | FraudType::SuspenseAccountAbuse
679            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
680            FraudType::RevenueManipulation
681            | FraudType::ExpenseCapitalization
682            | FraudType::ImproperCapitalization
683            | FraudType::ReserveManipulation
684            | FraudType::UnauthorizedAccess
685            | FraudType::PrematureRevenue
686            | FraudType::UnderstatedLiabilities
687            | FraudType::OverstatedAssets
688            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
689            FraudType::DuplicatePayment
690            | FraudType::TimingAnomaly
691            | FraudType::SelfApproval
692            | FraudType::ExceededApprovalLimit
693            | FraudType::SegregationOfDutiesViolation
694            | FraudType::UnauthorizedApproval
695            | FraudType::CollusiveApproval
696            | FraudType::FictitiousVendor
697            | FraudType::ShellCompanyPayment
698            | FraudType::Kickback
699            | FraudType::KickbackScheme
700            | FraudType::InvoiceManipulation
701            | FraudType::AssetMisappropriation
702            | FraudType::InventoryTheft
703            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
704            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
705            FraudType::ImproperRevenueRecognition
706            | FraudType::ImproperPoAllocation
707            | FraudType::VariableConsiderationManipulation
708            | FraudType::ContractModificationMisstatement => {
709                FraudAmountPattern::StatisticallyImprobable
710            }
711            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
712            FraudType::LeaseClassificationManipulation
713            | FraudType::OffBalanceSheetLease
714            | FraudType::LeaseLiabilityUnderstatement
715            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
716            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
717            FraudType::FairValueHierarchyManipulation
718            | FraudType::Level3InputManipulation
719            | FraudType::ValuationTechniqueManipulation => {
720                FraudAmountPattern::StatisticallyImprobable
721            }
722            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
723            FraudType::DelayedImpairment
724            | FraudType::ImpairmentTestAvoidance
725            | FraudType::CashFlowProjectionManipulation
726            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
727            // Sourcing/Procurement Fraud
728            FraudType::BidRigging
729            | FraudType::PhantomVendorContract
730            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
731            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
732            // HR/Payroll Fraud
733            FraudType::GhostEmployeePayroll
734            | FraudType::PayrollInflation
735            | FraudType::DuplicateExpenseReport
736            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
737            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
738            // O2C Fraud
739            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
740            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
741        }
742    }
743
744    /// Generate a deterministic UUID using the factory.
745    #[inline]
746    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
747        self.uuid_factory.next()
748    }
749
750    /// Cost center pool used for expense account enrichment.
751    const COST_CENTER_POOL: &'static [&'static str] =
752        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
753
754    /// Enrich journal entry line items with account descriptions, cost centers,
755    /// profit centers, value dates, line text, and assignment fields.
756    ///
757    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
758    /// and `::credit()` leave as `None`.
759    fn enrich_line_items(&self, entry: &mut JournalEntry) {
760        let posting_date = entry.header.posting_date;
761        let company_code = &entry.header.company_code;
762        let header_text = entry.header.header_text.clone();
763        let business_process = entry.header.business_process;
764
765        // Derive a deterministic index from the document_id for cost center selection
766        let doc_id_bytes = entry.header.document_id.as_bytes();
767        let mut cc_seed: usize = 0;
768        for &b in doc_id_bytes {
769            cc_seed = cc_seed.wrapping_add(b as usize);
770        }
771
772        for (i, line) in entry.lines.iter_mut().enumerate() {
773            // 1. account_description: look up from CoA
774            if line.account_description.is_none() {
775                line.account_description = self
776                    .coa
777                    .get_account(&line.gl_account)
778                    .map(|a| a.short_description.clone());
779            }
780
781            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
782            if line.cost_center.is_none() {
783                let first_char = line.gl_account.chars().next().unwrap_or('0');
784                if first_char == '5' || first_char == '6' {
785                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
786                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
787                }
788            }
789
790            // 3. profit_center: derive from company code + business process
791            if line.profit_center.is_none() {
792                let suffix = match business_process {
793                    Some(BusinessProcess::P2P) => "-P2P",
794                    Some(BusinessProcess::O2C) => "-O2C",
795                    Some(BusinessProcess::R2R) => "-R2R",
796                    Some(BusinessProcess::H2R) => "-H2R",
797                    _ => "",
798                };
799                line.profit_center = Some(format!("PC-{}{}", company_code, suffix));
800            }
801
802            // 4. line_text: fall back to header_text if not already set
803            if line.line_text.is_none() {
804                line.line_text = header_text.clone();
805            }
806
807            // 5. value_date: set to posting_date for AR/AP accounts
808            if line.value_date.is_none()
809                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
810            {
811                line.value_date = Some(posting_date);
812            }
813
814            // 6. assignment: set to vendor/customer reference for AP/AR lines
815            if line.assignment.is_none() {
816                if line.gl_account.starts_with("2000") {
817                    // AP line - use vendor reference from header
818                    if let Some(ref ht) = header_text {
819                        // Try to extract vendor ID from header text patterns like "... - V-001"
820                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
821                            if vendor_part.starts_with("V-")
822                                || vendor_part.starts_with("VENDOR")
823                                || vendor_part.starts_with("Vendor")
824                            {
825                                line.assignment = Some(vendor_part.to_string());
826                            }
827                        }
828                    }
829                } else if line.gl_account.starts_with("1100") {
830                    // AR line - use customer reference from header
831                    if let Some(ref ht) = header_text {
832                        if let Some(customer_part) = ht.rsplit(" - ").next() {
833                            if customer_part.starts_with("C-")
834                                || customer_part.starts_with("CUST")
835                                || customer_part.starts_with("Customer")
836                            {
837                                line.assignment = Some(customer_part.to_string());
838                            }
839                        }
840                    }
841                }
842            }
843        }
844    }
845
846    /// Generate a single journal entry.
847    pub fn generate(&mut self) -> JournalEntry {
848        debug!(
849            count = self.count,
850            companies = self.companies.len(),
851            start_date = %self.start_date,
852            end_date = %self.end_date,
853            "Generating journal entry"
854        );
855
856        // Check if we're in a batch - if so, generate a batched entry
857        if let Some(ref state) = self.batch_state {
858            if state.remaining > 0 {
859                return self.generate_batched_entry();
860            }
861        }
862
863        self.count += 1;
864
865        // Generate deterministic document ID
866        let document_id = self.generate_deterministic_uuid();
867
868        // Sample posting date
869        let mut posting_date = self
870            .temporal_sampler
871            .sample_date(self.start_date, self.end_date);
872
873        // Adjust posting date to be a business day if business day calculator is configured
874        if let Some(ref calc) = self.business_day_calculator {
875            if !calc.is_business_day(posting_date) {
876                // Move to next business day
877                posting_date = calc.next_business_day(posting_date, false);
878                // Ensure we don't exceed end_date
879                if posting_date > self.end_date {
880                    posting_date = calc.prev_business_day(self.end_date, true);
881                }
882            }
883        }
884
885        // Select company using weighted selector
886        let company_code = self.company_selector.select(&mut self.rng).to_string();
887
888        // Sample line item specification
889        let line_spec = self.line_sampler.sample();
890
891        // Determine source type using full 4-way distribution
892        let source = self.select_source();
893        let is_automated = matches!(
894            source,
895            TransactionSource::Automated | TransactionSource::Recurring
896        );
897
898        // Select business process
899        let business_process = self.select_business_process();
900
901        // Determine if this is a fraudulent transaction
902        let fraud_type = self.determine_fraud();
903        let is_fraud = fraud_type.is_some();
904
905        // Sample time based on source
906        let time = self.temporal_sampler.sample_time(!is_automated);
907        let created_at = posting_date.and_time(time).and_utc();
908
909        // Select user from pool or generate generic
910        let (created_by, user_persona) = self.select_user(is_automated);
911
912        // Create header with deterministic UUID
913        let mut header =
914            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
915        header.created_at = created_at;
916        header.source = source;
917        header.created_by = created_by;
918        header.user_persona = user_persona;
919        header.business_process = Some(business_process);
920        header.document_type = Self::document_type_for_process(business_process).to_string();
921        header.is_fraud = is_fraud;
922        header.fraud_type = fraud_type;
923
924        // Generate description context
925        let mut context =
926            DescriptionContext::with_period(posting_date.month(), posting_date.year());
927
928        // Add vendor/customer context based on business process
929        match business_process {
930            BusinessProcess::P2P => {
931                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
932                    context.vendor_name = Some(vendor.name.clone());
933                }
934            }
935            BusinessProcess::O2C => {
936                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
937                    context.customer_name = Some(customer.name.clone());
938                }
939            }
940            _ => {}
941        }
942
943        // Generate header text if enabled
944        if self.template_config.descriptions.generate_header_text {
945            header.header_text = Some(self.description_generator.generate_header_text(
946                business_process,
947                &context,
948                &mut self.rng,
949            ));
950        }
951
952        // Generate reference if enabled
953        if self.template_config.references.generate_references {
954            header.reference = Some(
955                self.reference_generator
956                    .generate_for_process_year(business_process, posting_date.year()),
957            );
958        }
959
960        // Generate line items
961        let mut entry = JournalEntry::new(header);
962
963        // Generate amount - use fraud pattern if this is a fraudulent transaction
964        let base_amount = if let Some(ft) = fraud_type {
965            let pattern = self.fraud_type_to_amount_pattern(ft);
966            self.amount_sampler.sample_fraud(pattern)
967        } else {
968            self.amount_sampler.sample()
969        };
970
971        // Apply temporal drift if configured
972        let drift_adjusted_amount = {
973            let drift = self.get_drift_adjustments(posting_date);
974            if drift.amount_mean_multiplier != 1.0 {
975                // Apply drift multiplier (includes seasonal factor if enabled)
976                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
977                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
978                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
979            } else {
980                base_amount
981            }
982        };
983
984        // Apply human variation to amounts for non-automated transactions
985        let total_amount = if is_automated {
986            drift_adjusted_amount // Automated systems use exact amounts
987        } else {
988            self.apply_human_variation(drift_adjusted_amount)
989        };
990
991        // Generate debit lines
992        let debit_amounts = self
993            .amount_sampler
994            .sample_summing_to(line_spec.debit_count, total_amount);
995        for (i, amount) in debit_amounts.into_iter().enumerate() {
996            let account_number = self.select_debit_account().account_number.clone();
997            let mut line = JournalEntryLine::debit(
998                entry.header.document_id,
999                (i + 1) as u32,
1000                account_number.clone(),
1001                amount,
1002            );
1003
1004            // Generate line text if enabled
1005            if self.template_config.descriptions.generate_line_text {
1006                line.line_text = Some(self.description_generator.generate_line_text(
1007                    &account_number,
1008                    &context,
1009                    &mut self.rng,
1010                ));
1011            }
1012
1013            entry.add_line(line);
1014        }
1015
1016        // Generate credit lines - use the SAME amounts to ensure balance
1017        let credit_amounts = self
1018            .amount_sampler
1019            .sample_summing_to(line_spec.credit_count, total_amount);
1020        for (i, amount) in credit_amounts.into_iter().enumerate() {
1021            let account_number = self.select_credit_account().account_number.clone();
1022            let mut line = JournalEntryLine::credit(
1023                entry.header.document_id,
1024                (line_spec.debit_count + i + 1) as u32,
1025                account_number.clone(),
1026                amount,
1027            );
1028
1029            // Generate line text if enabled
1030            if self.template_config.descriptions.generate_line_text {
1031                line.line_text = Some(self.description_generator.generate_line_text(
1032                    &account_number,
1033                    &context,
1034                    &mut self.rng,
1035                ));
1036            }
1037
1038            entry.add_line(line);
1039        }
1040
1041        // Enrich line items with account descriptions, cost centers, etc.
1042        self.enrich_line_items(&mut entry);
1043
1044        // Apply persona-based errors if enabled and it's a human user
1045        if self.persona_errors_enabled && !is_automated {
1046            self.maybe_inject_persona_error(&mut entry);
1047        }
1048
1049        // Apply approval workflow if enabled and amount exceeds threshold
1050        if self.approval_enabled {
1051            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1052        }
1053
1054        // Maybe start a batch of similar entries for realism
1055        self.maybe_start_batch(&entry);
1056
1057        entry
1058    }
1059
1060    /// Enable or disable persona-based error injection.
1061    ///
1062    /// When enabled, entries created by human personas have a chance
1063    /// to contain realistic human errors based on their experience level.
1064    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1065        self.persona_errors_enabled = enabled;
1066        self
1067    }
1068
1069    /// Set fraud configuration for fraud injection.
1070    ///
1071    /// When fraud is enabled in the config, transactions have a chance
1072    /// to be marked as fraudulent based on the configured fraud rate.
1073    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1074        self.fraud_config = config;
1075        self
1076    }
1077
1078    /// Check if persona errors are enabled.
1079    pub fn persona_errors_enabled(&self) -> bool {
1080        self.persona_errors_enabled
1081    }
1082
1083    /// Enable or disable batch processing behavior.
1084    ///
1085    /// When enabled (default), the generator will occasionally produce batches
1086    /// of similar entries, simulating how humans batch similar work together.
1087    pub fn with_batching(mut self, enabled: bool) -> Self {
1088        if !enabled {
1089            self.batch_state = None;
1090        }
1091        self
1092    }
1093
1094    /// Check if batch processing is enabled.
1095    pub fn batching_enabled(&self) -> bool {
1096        // Batching is implicitly enabled when not explicitly disabled
1097        true
1098    }
1099
1100    /// Maybe start a batch based on the current entry.
1101    ///
1102    /// Humans often batch similar work: processing invoices from one vendor,
1103    /// entering expense reports for a trip, reconciling similar items.
1104    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1105        // Only start batch for non-automated, non-fraud entries
1106        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1107            return;
1108        }
1109
1110        // 15% chance to start a batch (most work is not batched)
1111        if self.rng.random::<f64>() > 0.15 {
1112            return;
1113        }
1114
1115        // Extract key attributes for batching
1116        let base_account = entry
1117            .lines
1118            .first()
1119            .map(|l| l.gl_account.clone())
1120            .unwrap_or_default();
1121
1122        let base_amount = entry.total_debit();
1123
1124        self.batch_state = Some(BatchState {
1125            base_account_number: base_account,
1126            base_amount,
1127            base_business_process: entry.header.business_process,
1128            base_posting_date: entry.header.posting_date,
1129            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1130        });
1131    }
1132
1133    /// Generate an entry that's part of the current batch.
1134    ///
1135    /// Batched entries have:
1136    /// - Same or very similar business process
1137    /// - Same posting date (batched work done together)
1138    /// - Similar amounts (within ±15%)
1139    /// - Same debit account (processing similar items)
1140    fn generate_batched_entry(&mut self) -> JournalEntry {
1141        use rust_decimal::Decimal;
1142
1143        // Decrement batch counter
1144        if let Some(ref mut state) = self.batch_state {
1145            state.remaining = state.remaining.saturating_sub(1);
1146        }
1147
1148        let Some(batch) = self.batch_state.clone() else {
1149            // This is a programming error - batch_state should be set before calling this method.
1150            // Clear state and fall back to generating a standard entry instead of panicking.
1151            tracing::warn!(
1152                "generate_batched_entry called without batch_state; generating standard entry"
1153            );
1154            self.batch_state = None;
1155            return self.generate();
1156        };
1157
1158        // Use the batch's posting date (work done on same day)
1159        let posting_date = batch.base_posting_date;
1160
1161        self.count += 1;
1162        let document_id = self.generate_deterministic_uuid();
1163
1164        // Select same company (batched work is usually same company)
1165        let company_code = self.company_selector.select(&mut self.rng).to_string();
1166
1167        // Use simplified line spec for batched entries (usually 2-line)
1168        let _line_spec = LineItemSpec {
1169            total_count: 2,
1170            debit_count: 1,
1171            credit_count: 1,
1172            split_type: DebitCreditSplit::Equal,
1173        };
1174
1175        // Batched entries are always manual
1176        let source = TransactionSource::Manual;
1177
1178        // Use the batch's business process
1179        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1180
1181        // Sample time
1182        let time = self.temporal_sampler.sample_time(true);
1183        let created_at = posting_date.and_time(time).and_utc();
1184
1185        // Same user for batched work
1186        let (created_by, user_persona) = self.select_user(false);
1187
1188        // Create header
1189        let mut header =
1190            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1191        header.created_at = created_at;
1192        header.source = source;
1193        header.created_by = created_by;
1194        header.user_persona = user_persona;
1195        header.business_process = Some(business_process);
1196        header.document_type = Self::document_type_for_process(business_process).to_string();
1197
1198        // Generate similar amount (within ±15% of base)
1199        let variation = self.rng.random_range(-0.15..0.15);
1200        let varied_amount =
1201            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1202        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1203
1204        // Create the entry
1205        let mut entry = JournalEntry::new(header);
1206
1207        // Use same debit account as batch base
1208        let debit_line = JournalEntryLine::debit(
1209            entry.header.document_id,
1210            1,
1211            batch.base_account_number.clone(),
1212            total_amount,
1213        );
1214        entry.add_line(debit_line);
1215
1216        // Select a credit account
1217        let credit_account = self.select_credit_account().account_number.clone();
1218        let credit_line =
1219            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1220        entry.add_line(credit_line);
1221
1222        // Enrich line items with account descriptions, cost centers, etc.
1223        self.enrich_line_items(&mut entry);
1224
1225        // Apply persona-based errors if enabled
1226        if self.persona_errors_enabled {
1227            self.maybe_inject_persona_error(&mut entry);
1228        }
1229
1230        // Apply approval workflow if enabled
1231        if self.approval_enabled {
1232            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1233        }
1234
1235        // Clear batch state if no more entries remaining
1236        if batch.remaining <= 1 {
1237            self.batch_state = None;
1238        }
1239
1240        entry
1241    }
1242
1243    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1244    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1245        // Parse persona from the entry header
1246        let persona_str = &entry.header.user_persona;
1247        let persona = match persona_str.to_lowercase().as_str() {
1248            s if s.contains("junior") => UserPersona::JuniorAccountant,
1249            s if s.contains("senior") => UserPersona::SeniorAccountant,
1250            s if s.contains("controller") => UserPersona::Controller,
1251            s if s.contains("manager") => UserPersona::Manager,
1252            s if s.contains("executive") => UserPersona::Executive,
1253            _ => return, // Don't inject errors for unknown personas
1254        };
1255
1256        // Get base error rate from persona
1257        let base_error_rate = persona.error_rate();
1258
1259        // Apply stress factors based on posting date
1260        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1261
1262        // Check if error should occur based on adjusted rate
1263        if self.rng.random::<f64>() >= adjusted_rate {
1264            return; // No error this time
1265        }
1266
1267        // Select and inject persona-appropriate error
1268        self.inject_human_error(entry, persona);
1269    }
1270
1271    /// Apply contextual stress factors to the base error rate.
1272    ///
1273    /// Stress factors increase error likelihood during:
1274    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1275    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1276    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1277    /// - Monday morning (catch-up work): 20% more errors
1278    /// - Friday afternoon (rushing to leave): 30% more errors
1279    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1280        use chrono::Datelike;
1281
1282        let mut rate = base_rate;
1283        let day = posting_date.day();
1284        let month = posting_date.month();
1285
1286        // Year-end stress (December 28-31): double the error rate
1287        if month == 12 && day >= 28 {
1288            rate *= 2.0;
1289            return rate.min(0.5); // Cap at 50% to keep it realistic
1290        }
1291
1292        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1293        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1294            rate *= 1.75; // 75% more errors at quarter end
1295            return rate.min(0.4);
1296        }
1297
1298        // Month-end stress (last 3 days of month)
1299        if day >= 28 {
1300            rate *= 1.5; // 50% more errors at month end
1301        }
1302
1303        // Day-of-week stress effects
1304        let weekday = posting_date.weekday();
1305        match weekday {
1306            chrono::Weekday::Mon => {
1307                // Monday: catching up, often rushed
1308                rate *= 1.2;
1309            }
1310            chrono::Weekday::Fri => {
1311                // Friday: rushing to finish before weekend
1312                rate *= 1.3;
1313            }
1314            _ => {}
1315        }
1316
1317        // Cap at 40% to keep it realistic
1318        rate.min(0.4)
1319    }
1320
1321    /// Apply human-like variation to an amount.
1322    ///
1323    /// Humans don't enter perfectly calculated amounts - they:
1324    /// - Round amounts differently
1325    /// - Estimate instead of calculating exactly
1326    /// - Make small input variations
1327    ///
1328    /// This applies small variations (typically ±2%) to make amounts more realistic.
1329    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1330        use rust_decimal::Decimal;
1331
1332        // Automated transactions or very small amounts don't get variation
1333        if amount < Decimal::from(10) {
1334            return amount;
1335        }
1336
1337        // 70% chance of human variation being applied
1338        if self.rng.random::<f64>() > 0.70 {
1339            return amount;
1340        }
1341
1342        // Decide which type of human variation to apply
1343        let variation_type: u8 = self.rng.random_range(0..4);
1344
1345        match variation_type {
1346            0 => {
1347                // ±2% variation (common for estimated amounts)
1348                let variation_pct = self.rng.random_range(-0.02..0.02);
1349                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1350                (amount + variation).round_dp(2)
1351            }
1352            1 => {
1353                // Round to nearest $10
1354                let ten = Decimal::from(10);
1355                (amount / ten).round() * ten
1356            }
1357            2 => {
1358                // Round to nearest $100 (for larger amounts)
1359                if amount >= Decimal::from(500) {
1360                    let hundred = Decimal::from(100);
1361                    (amount / hundred).round() * hundred
1362                } else {
1363                    amount
1364                }
1365            }
1366            3 => {
1367                // Slight under/over payment (±$0.01 to ±$1.00)
1368                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1369                (amount + cents).max(Decimal::ZERO).round_dp(2)
1370            }
1371            _ => amount,
1372        }
1373    }
1374
1375    /// Rebalance an entry after a one-sided amount modification.
1376    ///
1377    /// When an error modifies one line's amount, this finds a line on the opposite
1378    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1379    /// same impact to maintain balance.
1380    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1381        // Find a line on the opposite side to adjust
1382        let balancing_idx = entry.lines.iter().position(|l| {
1383            if modified_was_debit {
1384                l.credit_amount > Decimal::ZERO
1385            } else {
1386                l.debit_amount > Decimal::ZERO
1387            }
1388        });
1389
1390        if let Some(idx) = balancing_idx {
1391            if modified_was_debit {
1392                entry.lines[idx].credit_amount += impact;
1393            } else {
1394                entry.lines[idx].debit_amount += impact;
1395            }
1396        }
1397    }
1398
1399    /// Inject a human-like error based on the persona.
1400    ///
1401    /// All error types maintain balance - amount modifications are applied to both sides.
1402    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1403    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1404        use rust_decimal::Decimal;
1405
1406        // Different personas make different types of errors
1407        let error_type: u8 = match persona {
1408            UserPersona::JuniorAccountant => {
1409                // Junior accountants make more varied errors
1410                self.rng.random_range(0..5)
1411            }
1412            UserPersona::SeniorAccountant => {
1413                // Senior accountants mainly make transposition errors
1414                self.rng.random_range(0..3)
1415            }
1416            UserPersona::Controller | UserPersona::Manager => {
1417                // Controllers/managers mainly make rounding or cutoff errors
1418                self.rng.random_range(3..5)
1419            }
1420            _ => return,
1421        };
1422
1423        match error_type {
1424            0 => {
1425                // Transposed digits in an amount
1426                if let Some(line) = entry.lines.get_mut(0) {
1427                    let is_debit = line.debit_amount > Decimal::ZERO;
1428                    let original_amount = if is_debit {
1429                        line.debit_amount
1430                    } else {
1431                        line.credit_amount
1432                    };
1433
1434                    // Simple digit swap in the string representation
1435                    let s = original_amount.to_string();
1436                    if s.len() >= 2 {
1437                        let chars: Vec<char> = s.chars().collect();
1438                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1439                        if chars[pos].is_ascii_digit()
1440                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1441                        {
1442                            let mut new_chars = chars;
1443                            new_chars.swap(pos, pos + 1);
1444                            if let Ok(new_amount) =
1445                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1446                            {
1447                                let impact = new_amount - original_amount;
1448
1449                                // Apply to the modified line
1450                                if is_debit {
1451                                    entry.lines[0].debit_amount = new_amount;
1452                                } else {
1453                                    entry.lines[0].credit_amount = new_amount;
1454                                }
1455
1456                                // Rebalance the entry
1457                                Self::rebalance_entry(entry, is_debit, impact);
1458
1459                                entry.header.header_text = Some(
1460                                    entry.header.header_text.clone().unwrap_or_default()
1461                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1462                                );
1463                            }
1464                        }
1465                    }
1466                }
1467            }
1468            1 => {
1469                // Wrong decimal place (off by factor of 10)
1470                if let Some(line) = entry.lines.get_mut(0) {
1471                    let is_debit = line.debit_amount > Decimal::ZERO;
1472                    let original_amount = if is_debit {
1473                        line.debit_amount
1474                    } else {
1475                        line.credit_amount
1476                    };
1477
1478                    let new_amount = original_amount * Decimal::new(10, 0);
1479                    let impact = new_amount - original_amount;
1480
1481                    // Apply to the modified line
1482                    if is_debit {
1483                        entry.lines[0].debit_amount = new_amount;
1484                    } else {
1485                        entry.lines[0].credit_amount = new_amount;
1486                    }
1487
1488                    // Rebalance the entry
1489                    Self::rebalance_entry(entry, is_debit, impact);
1490
1491                    entry.header.header_text = Some(
1492                        entry.header.header_text.clone().unwrap_or_default()
1493                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1494                    );
1495                }
1496            }
1497            2 => {
1498                // Typo in description (doesn't affect balance)
1499                if let Some(ref mut text) = entry.header.header_text {
1500                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1501                    let correct = ["the", "and", "with", "that", "receive"];
1502                    let idx = self.rng.random_range(0..typos.len());
1503                    if text.to_lowercase().contains(correct[idx]) {
1504                        *text = text.replace(correct[idx], typos[idx]);
1505                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1506                    }
1507                }
1508            }
1509            3 => {
1510                // Rounding to round number
1511                if let Some(line) = entry.lines.get_mut(0) {
1512                    let is_debit = line.debit_amount > Decimal::ZERO;
1513                    let original_amount = if is_debit {
1514                        line.debit_amount
1515                    } else {
1516                        line.credit_amount
1517                    };
1518
1519                    let new_amount =
1520                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1521                    let impact = new_amount - original_amount;
1522
1523                    // Apply to the modified line
1524                    if is_debit {
1525                        entry.lines[0].debit_amount = new_amount;
1526                    } else {
1527                        entry.lines[0].credit_amount = new_amount;
1528                    }
1529
1530                    // Rebalance the entry
1531                    Self::rebalance_entry(entry, is_debit, impact);
1532
1533                    entry.header.header_text = Some(
1534                        entry.header.header_text.clone().unwrap_or_default()
1535                            + " [HUMAN_ERROR:ROUNDED]",
1536                    );
1537                }
1538            }
1539            4 => {
1540                // Late posting marker (document date much earlier than posting date)
1541                // This doesn't create an imbalance
1542                if entry.header.document_date == entry.header.posting_date {
1543                    let days_late = self.rng.random_range(5..15);
1544                    entry.header.document_date =
1545                        entry.header.posting_date - chrono::Duration::days(days_late);
1546                    entry.header.header_text = Some(
1547                        entry.header.header_text.clone().unwrap_or_default()
1548                            + " [HUMAN_ERROR:LATE_POSTING]",
1549                    );
1550                }
1551            }
1552            _ => {}
1553        }
1554    }
1555
1556    /// Apply approval workflow for high-value transactions.
1557    ///
1558    /// If the entry amount exceeds the approval threshold, simulate an
1559    /// approval workflow with appropriate approvers based on amount.
1560    fn maybe_apply_approval_workflow(
1561        &mut self,
1562        entry: &mut JournalEntry,
1563        _posting_date: NaiveDate,
1564    ) {
1565        use rust_decimal::Decimal;
1566
1567        let amount = entry.total_debit();
1568
1569        // Skip if amount is below threshold
1570        if amount <= self.approval_threshold {
1571            // Auto-approved below threshold
1572            let workflow = ApprovalWorkflow::auto_approved(
1573                entry.header.created_by.clone(),
1574                entry.header.user_persona.clone(),
1575                amount,
1576                entry.header.created_at,
1577            );
1578            entry.header.approval_workflow = Some(workflow);
1579            return;
1580        }
1581
1582        // Mark as SOX relevant for high-value transactions
1583        entry.header.sox_relevant = true;
1584
1585        // Determine required approval levels based on amount
1586        let required_levels = if amount > Decimal::new(100000, 0) {
1587            3 // Executive approval required
1588        } else if amount > Decimal::new(50000, 0) {
1589            2 // Senior management approval
1590        } else {
1591            1 // Manager approval
1592        };
1593
1594        // Create the approval workflow
1595        let mut workflow = ApprovalWorkflow::new(
1596            entry.header.created_by.clone(),
1597            entry.header.user_persona.clone(),
1598            amount,
1599        );
1600        workflow.required_levels = required_levels;
1601
1602        // Simulate submission
1603        let submit_time = entry.header.created_at;
1604        let submit_action = ApprovalAction::new(
1605            entry.header.created_by.clone(),
1606            entry.header.user_persona.clone(),
1607            self.parse_persona(&entry.header.user_persona),
1608            ApprovalActionType::Submit,
1609            0,
1610        )
1611        .with_timestamp(submit_time);
1612
1613        workflow.actions.push(submit_action);
1614        workflow.status = ApprovalStatus::Pending;
1615        workflow.submitted_at = Some(submit_time);
1616
1617        // Simulate approvals with realistic delays
1618        let mut current_time = submit_time;
1619        for level in 1..=required_levels {
1620            // Add delay for approval (1-3 business hours per level)
1621            let delay_hours = self.rng.random_range(1..4);
1622            current_time += chrono::Duration::hours(delay_hours);
1623
1624            // Skip weekends
1625            while current_time.weekday() == chrono::Weekday::Sat
1626                || current_time.weekday() == chrono::Weekday::Sun
1627            {
1628                current_time += chrono::Duration::days(1);
1629            }
1630
1631            // Generate approver based on level
1632            let (approver_id, approver_role) = self.select_approver(level);
1633
1634            let approve_action = ApprovalAction::new(
1635                approver_id.clone(),
1636                approver_role.to_string(),
1637                approver_role,
1638                ApprovalActionType::Approve,
1639                level,
1640            )
1641            .with_timestamp(current_time);
1642
1643            workflow.actions.push(approve_action);
1644            workflow.current_level = level;
1645        }
1646
1647        // Mark as approved
1648        workflow.status = ApprovalStatus::Approved;
1649        workflow.approved_at = Some(current_time);
1650
1651        entry.header.approval_workflow = Some(workflow);
1652    }
1653
1654    /// Select an approver based on the required level.
1655    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1656        let persona = match level {
1657            1 => UserPersona::Manager,
1658            2 => UserPersona::Controller,
1659            _ => UserPersona::Executive,
1660        };
1661
1662        // Try to get from user pool first
1663        if let Some(ref pool) = self.user_pool {
1664            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1665                return (user.user_id.clone(), persona);
1666            }
1667        }
1668
1669        // Fallback to generated approver
1670        let approver_id = match persona {
1671            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1672            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1673            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1674            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1675        };
1676
1677        (approver_id, persona)
1678    }
1679
1680    /// Parse user persona from string.
1681    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1682        match persona_str.to_lowercase().as_str() {
1683            s if s.contains("junior") => UserPersona::JuniorAccountant,
1684            s if s.contains("senior") => UserPersona::SeniorAccountant,
1685            s if s.contains("controller") => UserPersona::Controller,
1686            s if s.contains("manager") => UserPersona::Manager,
1687            s if s.contains("executive") => UserPersona::Executive,
1688            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1689            _ => UserPersona::JuniorAccountant, // Default
1690        }
1691    }
1692
1693    /// Enable or disable approval workflow.
1694    pub fn with_approval(mut self, enabled: bool) -> Self {
1695        self.approval_enabled = enabled;
1696        self
1697    }
1698
1699    /// Set the approval threshold amount.
1700    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1701        self.approval_threshold = threshold;
1702        self
1703    }
1704
1705    /// Set the temporal drift controller for simulating distribution changes over time.
1706    ///
1707    /// When drift is enabled, amounts and other distributions will shift based on
1708    /// the period (month) to simulate realistic temporal evolution like inflation
1709    /// or increasing fraud rates.
1710    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1711        self.drift_controller = Some(controller);
1712        self
1713    }
1714
1715    /// Set drift configuration directly.
1716    ///
1717    /// Creates a drift controller from the config. Total periods is calculated
1718    /// from the date range.
1719    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1720        if config.enabled {
1721            let total_periods = self.calculate_total_periods();
1722            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1723        }
1724        self
1725    }
1726
1727    /// Calculate total periods (months) in the date range.
1728    fn calculate_total_periods(&self) -> u32 {
1729        let start_year = self.start_date.year();
1730        let start_month = self.start_date.month();
1731        let end_year = self.end_date.year();
1732        let end_month = self.end_date.month();
1733
1734        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1735    }
1736
1737    /// Calculate the period number (0-indexed) for a given date.
1738    fn date_to_period(&self, date: NaiveDate) -> u32 {
1739        let start_year = self.start_date.year();
1740        let start_month = self.start_date.month() as i32;
1741        let date_year = date.year();
1742        let date_month = date.month() as i32;
1743
1744        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1745    }
1746
1747    /// Get drift adjustments for a given date.
1748    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1749        if let Some(ref controller) = self.drift_controller {
1750            let period = self.date_to_period(date);
1751            controller.compute_adjustments(period)
1752        } else {
1753            DriftAdjustments::none()
1754        }
1755    }
1756
1757    /// Select a user from the pool or generate a generic user ID.
1758    #[inline]
1759    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1760        if let Some(ref pool) = self.user_pool {
1761            let persona = if is_automated {
1762                UserPersona::AutomatedSystem
1763            } else {
1764                // Random distribution among human personas
1765                let roll: f64 = self.rng.random();
1766                if roll < 0.4 {
1767                    UserPersona::JuniorAccountant
1768                } else if roll < 0.7 {
1769                    UserPersona::SeniorAccountant
1770                } else if roll < 0.85 {
1771                    UserPersona::Controller
1772                } else {
1773                    UserPersona::Manager
1774                }
1775            };
1776
1777            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1778                return (user.user_id.clone(), user.persona.to_string());
1779            }
1780        }
1781
1782        // Fallback to generic format
1783        if is_automated {
1784            (
1785                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1786                "automated_system".to_string(),
1787            )
1788        } else {
1789            (
1790                format!("USER{:04}", self.rng.random_range(1..=40)),
1791                "senior_accountant".to_string(),
1792            )
1793        }
1794    }
1795
1796    /// Select transaction source based on configuration weights.
1797    #[inline]
1798    fn select_source(&mut self) -> TransactionSource {
1799        let roll: f64 = self.rng.random();
1800        let dist = &self.config.source_distribution;
1801
1802        if roll < dist.manual {
1803            TransactionSource::Manual
1804        } else if roll < dist.manual + dist.automated {
1805            TransactionSource::Automated
1806        } else if roll < dist.manual + dist.automated + dist.recurring {
1807            TransactionSource::Recurring
1808        } else {
1809            TransactionSource::Adjustment
1810        }
1811    }
1812
1813    /// Select a business process based on configuration weights.
1814    #[inline]
1815    /// Map a business process to a SAP-style document type code.
1816    ///
1817    /// - P2P → "KR" (vendor invoice)
1818    /// - O2C → "DR" (customer invoice)
1819    /// - R2R → "SA" (general journal)
1820    /// - H2R → "HR" (HR posting)
1821    /// - A2R → "AA" (asset posting)
1822    /// - others → "SA"
1823    fn document_type_for_process(process: BusinessProcess) -> &'static str {
1824        match process {
1825            BusinessProcess::P2P => "KR",
1826            BusinessProcess::O2C => "DR",
1827            BusinessProcess::R2R => "SA",
1828            BusinessProcess::H2R => "HR",
1829            BusinessProcess::A2R => "AA",
1830            _ => "SA",
1831        }
1832    }
1833
1834    fn select_business_process(&mut self) -> BusinessProcess {
1835        let roll: f64 = self.rng.random();
1836
1837        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1838        if roll < 0.35 {
1839            BusinessProcess::O2C
1840        } else if roll < 0.65 {
1841            BusinessProcess::P2P
1842        } else if roll < 0.85 {
1843            BusinessProcess::R2R
1844        } else if roll < 0.95 {
1845            BusinessProcess::H2R
1846        } else {
1847            BusinessProcess::A2R
1848        }
1849    }
1850
1851    #[inline]
1852    fn select_debit_account(&mut self) -> &GLAccount {
1853        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1854        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1855
1856        // 60% asset, 40% expense for debits
1857        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1858            accounts
1859        } else {
1860            expense_accounts
1861        };
1862
1863        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1864            tracing::warn!(
1865                "Account selection returned empty list, falling back to first COA account"
1866            );
1867            &self.coa.accounts[0]
1868        })
1869    }
1870
1871    #[inline]
1872    fn select_credit_account(&mut self) -> &GLAccount {
1873        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1874        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1875
1876        // 60% liability, 40% revenue for credits
1877        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1878            liability_accounts
1879        } else {
1880            revenue_accounts
1881        };
1882
1883        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1884            tracing::warn!(
1885                "Account selection returned empty list, falling back to first COA account"
1886            );
1887            &self.coa.accounts[0]
1888        })
1889    }
1890}
1891
1892impl Generator for JournalEntryGenerator {
1893    type Item = JournalEntry;
1894    type Config = (
1895        TransactionConfig,
1896        Arc<ChartOfAccounts>,
1897        Vec<String>,
1898        NaiveDate,
1899        NaiveDate,
1900    );
1901
1902    fn new(config: Self::Config, seed: u64) -> Self {
1903        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1904    }
1905
1906    fn generate_one(&mut self) -> Self::Item {
1907        self.generate()
1908    }
1909
1910    fn reset(&mut self) {
1911        self.rng = seeded_rng(self.seed, 0);
1912        self.line_sampler.reset(self.seed + 1);
1913        self.amount_sampler.reset(self.seed + 2);
1914        self.temporal_sampler.reset(self.seed + 3);
1915        self.count = 0;
1916        self.uuid_factory.reset();
1917
1918        // Reset reference generator by recreating it
1919        let mut ref_gen = ReferenceGenerator::new(
1920            self.start_date.year(),
1921            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1922        );
1923        ref_gen.set_prefix(
1924            ReferenceType::Invoice,
1925            &self.template_config.references.invoice_prefix,
1926        );
1927        ref_gen.set_prefix(
1928            ReferenceType::PurchaseOrder,
1929            &self.template_config.references.po_prefix,
1930        );
1931        ref_gen.set_prefix(
1932            ReferenceType::SalesOrder,
1933            &self.template_config.references.so_prefix,
1934        );
1935        self.reference_generator = ref_gen;
1936    }
1937
1938    fn count(&self) -> u64 {
1939        self.count
1940    }
1941
1942    fn seed(&self) -> u64 {
1943        self.seed
1944    }
1945}
1946
1947use datasynth_core::traits::ParallelGenerator;
1948
1949impl ParallelGenerator for JournalEntryGenerator {
1950    /// Split this generator into `parts` independent sub-generators.
1951    ///
1952    /// Each sub-generator gets a deterministic seed derived from the parent seed
1953    /// and its partition index, plus a partitioned UUID factory to avoid contention.
1954    /// The results are deterministic for a given partition count.
1955    fn split(self, parts: usize) -> Vec<Self> {
1956        let parts = parts.max(1);
1957        (0..parts)
1958            .map(|i| {
1959                // Derive a unique seed per partition using a golden-ratio constant
1960                let sub_seed = self
1961                    .seed
1962                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1963
1964                let mut gen = JournalEntryGenerator::new_with_full_config(
1965                    self.config.clone(),
1966                    Arc::clone(&self.coa),
1967                    self.companies.clone(),
1968                    self.start_date,
1969                    self.end_date,
1970                    sub_seed,
1971                    self.template_config.clone(),
1972                    self.user_pool.clone(),
1973                );
1974
1975                // Copy over configuration state
1976                gen.company_selector = self.company_selector.clone();
1977                gen.vendor_pool = self.vendor_pool.clone();
1978                gen.customer_pool = self.customer_pool.clone();
1979                gen.material_pool = self.material_pool.clone();
1980                gen.using_real_master_data = self.using_real_master_data;
1981                gen.fraud_config = self.fraud_config.clone();
1982                gen.persona_errors_enabled = self.persona_errors_enabled;
1983                gen.approval_enabled = self.approval_enabled;
1984                gen.approval_threshold = self.approval_threshold;
1985
1986                // Use partitioned UUID factory to eliminate atomic contention
1987                gen.uuid_factory = DeterministicUuidFactory::for_partition(
1988                    sub_seed,
1989                    GeneratorType::JournalEntry,
1990                    i as u8,
1991                );
1992
1993                // Copy temporal patterns if configured
1994                if let Some(ref config) = self.temporal_patterns_config {
1995                    gen.temporal_patterns_config = Some(config.clone());
1996                    // Rebuild business day calculator from the stored config
1997                    if config.business_days.enabled {
1998                        if let Some(ref bdc) = self.business_day_calculator {
1999                            gen.business_day_calculator = Some(bdc.clone());
2000                        }
2001                    }
2002                    // Rebuild processing lag calculator with partition seed
2003                    if config.processing_lags.enabled {
2004                        let lag_config =
2005                            Self::convert_processing_lag_config(&config.processing_lags);
2006                        gen.processing_lag_calculator =
2007                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2008                    }
2009                }
2010
2011                // Copy drift controller if present
2012                if let Some(ref dc) = self.drift_controller {
2013                    gen.drift_controller = Some(dc.clone());
2014                }
2015
2016                gen
2017            })
2018            .collect()
2019    }
2020}
2021
2022#[cfg(test)]
2023#[allow(clippy::unwrap_used)]
2024mod tests {
2025    use super::*;
2026    use crate::ChartOfAccountsGenerator;
2027
2028    #[test]
2029    fn test_generate_balanced_entries() {
2030        let mut coa_gen =
2031            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2032        let coa = Arc::new(coa_gen.generate());
2033
2034        let mut je_gen = JournalEntryGenerator::new_with_params(
2035            TransactionConfig::default(),
2036            coa,
2037            vec!["1000".to_string()],
2038            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2039            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2040            42,
2041        );
2042
2043        let mut balanced_count = 0;
2044        for _ in 0..100 {
2045            let entry = je_gen.generate();
2046
2047            // Skip entries with human errors as they may be intentionally unbalanced
2048            let has_human_error = entry
2049                .header
2050                .header_text
2051                .as_ref()
2052                .map(|t| t.contains("[HUMAN_ERROR:"))
2053                .unwrap_or(false);
2054
2055            if !has_human_error {
2056                assert!(
2057                    entry.is_balanced(),
2058                    "Entry {:?} is not balanced",
2059                    entry.header.document_id
2060                );
2061                balanced_count += 1;
2062            }
2063            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2064        }
2065
2066        // Ensure most entries are balanced (human errors are rare)
2067        assert!(
2068            balanced_count >= 80,
2069            "Expected at least 80 balanced entries, got {}",
2070            balanced_count
2071        );
2072    }
2073
2074    #[test]
2075    fn test_deterministic_generation() {
2076        let mut coa_gen =
2077            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2078        let coa = Arc::new(coa_gen.generate());
2079
2080        let mut gen1 = JournalEntryGenerator::new_with_params(
2081            TransactionConfig::default(),
2082            Arc::clone(&coa),
2083            vec!["1000".to_string()],
2084            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2085            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2086            42,
2087        );
2088
2089        let mut gen2 = JournalEntryGenerator::new_with_params(
2090            TransactionConfig::default(),
2091            coa,
2092            vec!["1000".to_string()],
2093            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2094            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2095            42,
2096        );
2097
2098        for _ in 0..50 {
2099            let e1 = gen1.generate();
2100            let e2 = gen2.generate();
2101            assert_eq!(e1.header.document_id, e2.header.document_id);
2102            assert_eq!(e1.total_debit(), e2.total_debit());
2103        }
2104    }
2105
2106    #[test]
2107    fn test_templates_generate_descriptions() {
2108        let mut coa_gen =
2109            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2110        let coa = Arc::new(coa_gen.generate());
2111
2112        // Enable all template features
2113        let template_config = TemplateConfig {
2114            names: datasynth_config::schema::NameTemplateConfig {
2115                generate_realistic_names: true,
2116                email_domain: "test.com".to_string(),
2117                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2118            },
2119            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2120                generate_header_text: true,
2121                generate_line_text: true,
2122            },
2123            references: datasynth_config::schema::ReferenceTemplateConfig {
2124                generate_references: true,
2125                invoice_prefix: "TEST-INV".to_string(),
2126                po_prefix: "TEST-PO".to_string(),
2127                so_prefix: "TEST-SO".to_string(),
2128            },
2129        };
2130
2131        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2132            TransactionConfig::default(),
2133            coa,
2134            vec!["1000".to_string()],
2135            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2136            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2137            42,
2138            template_config,
2139            None,
2140        )
2141        .with_persona_errors(false); // Disable for template testing
2142
2143        for _ in 0..10 {
2144            let entry = je_gen.generate();
2145
2146            // Verify header text is populated
2147            assert!(
2148                entry.header.header_text.is_some(),
2149                "Header text should be populated"
2150            );
2151
2152            // Verify reference is populated
2153            assert!(
2154                entry.header.reference.is_some(),
2155                "Reference should be populated"
2156            );
2157
2158            // Verify business process is set
2159            assert!(
2160                entry.header.business_process.is_some(),
2161                "Business process should be set"
2162            );
2163
2164            // Verify line text is populated
2165            for line in &entry.lines {
2166                assert!(line.line_text.is_some(), "Line text should be populated");
2167            }
2168
2169            // Entry should still be balanced
2170            assert!(entry.is_balanced());
2171        }
2172    }
2173
2174    #[test]
2175    fn test_user_pool_integration() {
2176        let mut coa_gen =
2177            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2178        let coa = Arc::new(coa_gen.generate());
2179
2180        let companies = vec!["1000".to_string()];
2181
2182        // Generate user pool
2183        let mut user_gen = crate::UserGenerator::new(42);
2184        let user_pool = user_gen.generate_standard(&companies);
2185
2186        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2187            TransactionConfig::default(),
2188            coa,
2189            companies,
2190            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2191            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2192            42,
2193            TemplateConfig::default(),
2194            Some(user_pool),
2195        );
2196
2197        // Generate entries and verify user IDs are from pool
2198        for _ in 0..20 {
2199            let entry = je_gen.generate();
2200
2201            // User ID should not be generic BATCH/USER format when pool is used
2202            // (though it may still fall back if random selection misses)
2203            assert!(!entry.header.created_by.is_empty());
2204        }
2205    }
2206
2207    #[test]
2208    fn test_master_data_connection() {
2209        let mut coa_gen =
2210            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2211        let coa = Arc::new(coa_gen.generate());
2212
2213        // Create test vendors
2214        let vendors = vec![
2215            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2216            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2217        ];
2218
2219        // Create test customers
2220        let customers = vec![
2221            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2222            Customer::new(
2223                "C-TEST-002",
2224                "Test Customer Two",
2225                CustomerType::SmallBusiness,
2226            ),
2227        ];
2228
2229        // Create test materials
2230        let materials = vec![Material::new(
2231            "MAT-TEST-001",
2232            "Test Material A",
2233            MaterialType::RawMaterial,
2234        )];
2235
2236        // Create generator with master data
2237        let generator = JournalEntryGenerator::new_with_params(
2238            TransactionConfig::default(),
2239            coa,
2240            vec!["1000".to_string()],
2241            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2242            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2243            42,
2244        );
2245
2246        // Without master data
2247        assert!(!generator.is_using_real_master_data());
2248
2249        // Connect master data
2250        let generator_with_data = generator
2251            .with_vendors(&vendors)
2252            .with_customers(&customers)
2253            .with_materials(&materials);
2254
2255        // Should now be using real master data
2256        assert!(generator_with_data.is_using_real_master_data());
2257    }
2258
2259    #[test]
2260    fn test_with_master_data_convenience_method() {
2261        let mut coa_gen =
2262            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2263        let coa = Arc::new(coa_gen.generate());
2264
2265        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2266        let customers = vec![Customer::new(
2267            "C-001",
2268            "Customer One",
2269            CustomerType::Corporate,
2270        )];
2271        let materials = vec![Material::new(
2272            "MAT-001",
2273            "Material One",
2274            MaterialType::RawMaterial,
2275        )];
2276
2277        let generator = JournalEntryGenerator::new_with_params(
2278            TransactionConfig::default(),
2279            coa,
2280            vec!["1000".to_string()],
2281            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2282            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2283            42,
2284        )
2285        .with_master_data(&vendors, &customers, &materials);
2286
2287        assert!(generator.is_using_real_master_data());
2288    }
2289
2290    #[test]
2291    fn test_stress_factors_increase_error_rate() {
2292        let mut coa_gen =
2293            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2294        let coa = Arc::new(coa_gen.generate());
2295
2296        let generator = JournalEntryGenerator::new_with_params(
2297            TransactionConfig::default(),
2298            coa,
2299            vec!["1000".to_string()],
2300            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2301            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2302            42,
2303        );
2304
2305        let base_rate = 0.1;
2306
2307        // Regular day - no stress factors
2308        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2309        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2310        assert!(
2311            (regular_rate - base_rate).abs() < 0.01,
2312            "Regular day should have minimal stress factor adjustment"
2313        );
2314
2315        // Month end - 50% more errors
2316        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2317        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2318        assert!(
2319            month_end_rate > regular_rate,
2320            "Month end should have higher error rate than regular day"
2321        );
2322
2323        // Year end - double the error rate
2324        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2325        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2326        assert!(
2327            year_end_rate > month_end_rate,
2328            "Year end should have highest error rate"
2329        );
2330
2331        // Friday stress
2332        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2333        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2334        assert!(
2335            friday_rate > regular_rate,
2336            "Friday should have higher error rate than mid-week"
2337        );
2338
2339        // Monday stress
2340        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2341        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2342        assert!(
2343            monday_rate > regular_rate,
2344            "Monday should have higher error rate than mid-week"
2345        );
2346    }
2347
2348    #[test]
2349    fn test_batching_produces_similar_entries() {
2350        let mut coa_gen =
2351            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2352        let coa = Arc::new(coa_gen.generate());
2353
2354        // Use seed 123 which is more likely to trigger batching
2355        let mut je_gen = JournalEntryGenerator::new_with_params(
2356            TransactionConfig::default(),
2357            coa,
2358            vec!["1000".to_string()],
2359            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2360            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2361            123,
2362        )
2363        .with_persona_errors(false); // Disable to ensure balanced entries
2364
2365        // Generate many entries - at 15% batch rate, should see some batches
2366        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2367
2368        // Check that all entries are balanced (batched or not)
2369        for entry in &entries {
2370            assert!(
2371                entry.is_balanced(),
2372                "All entries including batched should be balanced"
2373            );
2374        }
2375
2376        // Count entries with same-day posting dates (batch indicator)
2377        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2378            std::collections::HashMap::new();
2379        for entry in &entries {
2380            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2381        }
2382
2383        // With batching, some dates should have multiple entries
2384        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2385        assert!(
2386            dates_with_multiple > 0,
2387            "With batching, should see some dates with multiple entries"
2388        );
2389    }
2390
2391    #[test]
2392    fn test_temporal_patterns_business_days() {
2393        use datasynth_config::schema::{
2394            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2395        };
2396
2397        let mut coa_gen =
2398            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2399        let coa = Arc::new(coa_gen.generate());
2400
2401        // Create temporal patterns config with business days enabled
2402        let temporal_config = TemporalPatternsConfig {
2403            enabled: true,
2404            business_days: BusinessDaySchemaConfig {
2405                enabled: true,
2406                ..Default::default()
2407            },
2408            calendars: CalendarSchemaConfig {
2409                regions: vec!["US".to_string()],
2410                custom_holidays: vec![],
2411            },
2412            ..Default::default()
2413        };
2414
2415        let mut je_gen = JournalEntryGenerator::new_with_params(
2416            TransactionConfig::default(),
2417            coa,
2418            vec!["1000".to_string()],
2419            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2420            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2421            42,
2422        )
2423        .with_temporal_patterns(temporal_config, 42)
2424        .with_persona_errors(false);
2425
2426        // Generate entries and verify none fall on weekends
2427        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2428
2429        for entry in &entries {
2430            let weekday = entry.header.posting_date.weekday();
2431            assert!(
2432                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2433                "Posting date {:?} should not be a weekend",
2434                entry.header.posting_date
2435            );
2436        }
2437    }
2438
2439    #[test]
2440    fn test_default_generation_filters_weekends() {
2441        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2442        // This tests the fix where new_with_full_config always creates a default
2443        // BusinessDayCalculator with US holidays as a fallback.
2444        let mut coa_gen =
2445            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2446        let coa = Arc::new(coa_gen.generate());
2447
2448        let mut je_gen = JournalEntryGenerator::new_with_params(
2449            TransactionConfig::default(),
2450            coa,
2451            vec!["1000".to_string()],
2452            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2453            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2454            42,
2455        )
2456        .with_persona_errors(false);
2457
2458        let total = 500;
2459        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2460
2461        let weekend_count = entries
2462            .iter()
2463            .filter(|e| {
2464                let wd = e.header.posting_date.weekday();
2465                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2466            })
2467            .count();
2468
2469        let weekend_pct = weekend_count as f64 / total as f64;
2470        assert!(
2471            weekend_pct < 0.05,
2472            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2473             but got {:.1}% ({}/{})",
2474            weekend_pct * 100.0,
2475            weekend_count,
2476            total
2477        );
2478    }
2479
2480    #[test]
2481    fn test_document_type_derived_from_business_process() {
2482        let mut coa_gen =
2483            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2484        let coa = Arc::new(coa_gen.generate());
2485
2486        let mut je_gen = JournalEntryGenerator::new_with_params(
2487            TransactionConfig::default(),
2488            coa,
2489            vec!["1000".to_string()],
2490            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2491            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2492            99,
2493        )
2494        .with_persona_errors(false)
2495        .with_batching(false);
2496
2497        let total = 200;
2498        let mut doc_types = std::collections::HashSet::new();
2499        let mut sa_count = 0_usize;
2500
2501        for _ in 0..total {
2502            let entry = je_gen.generate();
2503            let dt = &entry.header.document_type;
2504            doc_types.insert(dt.clone());
2505            if dt == "SA" {
2506                sa_count += 1;
2507            }
2508        }
2509
2510        // Should have more than 3 distinct document types
2511        assert!(
2512            doc_types.len() > 3,
2513            "Expected >3 distinct document types, got {} ({:?})",
2514            doc_types.len(),
2515            doc_types,
2516        );
2517
2518        // "SA" should be less than 50% (R2R is 20% of the weight)
2519        let sa_pct = sa_count as f64 / total as f64;
2520        assert!(
2521            sa_pct < 0.50,
2522            "Expected SA <50%, got {:.1}% ({}/{})",
2523            sa_pct * 100.0,
2524            sa_count,
2525            total,
2526        );
2527    }
2528
2529    #[test]
2530    fn test_enrich_line_items_account_description() {
2531        let mut coa_gen =
2532            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2533        let coa = Arc::new(coa_gen.generate());
2534
2535        let mut je_gen = JournalEntryGenerator::new_with_params(
2536            TransactionConfig::default(),
2537            coa,
2538            vec!["1000".to_string()],
2539            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2540            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2541            42,
2542        )
2543        .with_persona_errors(false);
2544
2545        let total = 200;
2546        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2547
2548        // Count lines with account_description populated
2549        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2550        let lines_with_desc: usize = entries
2551            .iter()
2552            .flat_map(|e| &e.lines)
2553            .filter(|l| l.account_description.is_some())
2554            .count();
2555
2556        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2557        assert!(
2558            desc_pct > 0.95,
2559            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2560            desc_pct * 100.0,
2561            lines_with_desc,
2562            total_lines,
2563        );
2564    }
2565
2566    #[test]
2567    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2568        let mut coa_gen =
2569            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2570        let coa = Arc::new(coa_gen.generate());
2571
2572        let mut je_gen = JournalEntryGenerator::new_with_params(
2573            TransactionConfig::default(),
2574            coa,
2575            vec!["1000".to_string()],
2576            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2577            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2578            42,
2579        )
2580        .with_persona_errors(false);
2581
2582        let total = 300;
2583        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2584
2585        // Count expense account lines (5xxx/6xxx) with cost_center populated
2586        let expense_lines: Vec<&JournalEntryLine> = entries
2587            .iter()
2588            .flat_map(|e| &e.lines)
2589            .filter(|l| {
2590                let first = l.gl_account.chars().next().unwrap_or('0');
2591                first == '5' || first == '6'
2592            })
2593            .collect();
2594
2595        if !expense_lines.is_empty() {
2596            let with_cc = expense_lines
2597                .iter()
2598                .filter(|l| l.cost_center.is_some())
2599                .count();
2600            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2601            assert!(
2602                cc_pct > 0.80,
2603                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2604                cc_pct * 100.0,
2605                with_cc,
2606                expense_lines.len(),
2607            );
2608        }
2609    }
2610
2611    #[test]
2612    fn test_enrich_line_items_profit_center_and_line_text() {
2613        let mut coa_gen =
2614            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2615        let coa = Arc::new(coa_gen.generate());
2616
2617        let mut je_gen = JournalEntryGenerator::new_with_params(
2618            TransactionConfig::default(),
2619            coa,
2620            vec!["1000".to_string()],
2621            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2622            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2623            42,
2624        )
2625        .with_persona_errors(false);
2626
2627        let total = 100;
2628        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2629
2630        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2631
2632        // All lines should have profit_center
2633        let with_pc = entries
2634            .iter()
2635            .flat_map(|e| &e.lines)
2636            .filter(|l| l.profit_center.is_some())
2637            .count();
2638        let pc_pct = with_pc as f64 / total_lines as f64;
2639        assert!(
2640            pc_pct > 0.95,
2641            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2642            pc_pct * 100.0,
2643            with_pc,
2644            total_lines,
2645        );
2646
2647        // All lines should have line_text (either from template or header fallback)
2648        let with_text = entries
2649            .iter()
2650            .flat_map(|e| &e.lines)
2651            .filter(|l| l.line_text.is_some())
2652            .count();
2653        let text_pct = with_text as f64 / total_lines as f64;
2654        assert!(
2655            text_pct > 0.95,
2656            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2657            text_pct * 100.0,
2658            with_text,
2659            total_lines,
2660        );
2661    }
2662}