Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // SOD violation rate for approval tracking (0.0 to 1.0)
66    sod_violation_rate: f64,
67    // Batching behavior - humans often process similar items together
68    batch_state: Option<BatchState>,
69    // Temporal drift controller for simulating distribution changes over time
70    drift_controller: Option<DriftController>,
71    // Temporal patterns components
72    business_day_calculator: Option<BusinessDayCalculator>,
73    processing_lag_calculator: Option<ProcessingLagCalculator>,
74    temporal_patterns_config: Option<TemporalPatternsConfig>,
75}
76
77/// State for tracking batch processing behavior.
78///
79/// When humans process transactions, they often batch similar items together
80/// (e.g., processing all invoices from one vendor, entering similar expenses).
81#[derive(Clone)]
82struct BatchState {
83    /// The base entry template to vary
84    base_account_number: String,
85    base_amount: rust_decimal::Decimal,
86    base_business_process: Option<BusinessProcess>,
87    base_posting_date: NaiveDate,
88    /// Remaining entries in this batch
89    remaining: u8,
90}
91
92impl JournalEntryGenerator {
93    /// Create a new journal entry generator.
94    pub fn new_with_params(
95        config: TransactionConfig,
96        coa: Arc<ChartOfAccounts>,
97        companies: Vec<String>,
98        start_date: NaiveDate,
99        end_date: NaiveDate,
100        seed: u64,
101    ) -> Self {
102        Self::new_with_full_config(
103            config,
104            coa,
105            companies,
106            start_date,
107            end_date,
108            seed,
109            TemplateConfig::default(),
110            None,
111        )
112    }
113
114    /// Create a new journal entry generator with full configuration.
115    #[allow(clippy::too_many_arguments)]
116    pub fn new_with_full_config(
117        config: TransactionConfig,
118        coa: Arc<ChartOfAccounts>,
119        companies: Vec<String>,
120        start_date: NaiveDate,
121        end_date: NaiveDate,
122        seed: u64,
123        template_config: TemplateConfig,
124        user_pool: Option<UserPool>,
125    ) -> Self {
126        // Initialize user pool if not provided
127        let user_pool = user_pool.or_else(|| {
128            if template_config.names.generate_realistic_names {
129                let user_gen_config = UserGeneratorConfig {
130                    culture_distribution: vec![
131                        (
132                            datasynth_core::templates::NameCulture::WesternUs,
133                            template_config.names.culture_distribution.western_us,
134                        ),
135                        (
136                            datasynth_core::templates::NameCulture::Hispanic,
137                            template_config.names.culture_distribution.hispanic,
138                        ),
139                        (
140                            datasynth_core::templates::NameCulture::German,
141                            template_config.names.culture_distribution.german,
142                        ),
143                        (
144                            datasynth_core::templates::NameCulture::French,
145                            template_config.names.culture_distribution.french,
146                        ),
147                        (
148                            datasynth_core::templates::NameCulture::Chinese,
149                            template_config.names.culture_distribution.chinese,
150                        ),
151                        (
152                            datasynth_core::templates::NameCulture::Japanese,
153                            template_config.names.culture_distribution.japanese,
154                        ),
155                        (
156                            datasynth_core::templates::NameCulture::Indian,
157                            template_config.names.culture_distribution.indian,
158                        ),
159                    ],
160                    email_domain: template_config.names.email_domain.clone(),
161                    generate_realistic_names: true,
162                };
163                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
164                Some(user_gen.generate_standard(&companies))
165            } else {
166                None
167            }
168        });
169
170        // Initialize reference generator
171        let mut ref_gen = ReferenceGenerator::new(
172            start_date.year(),
173            companies
174                .first()
175                .map(std::string::String::as_str)
176                .unwrap_or("1000"),
177        );
178        ref_gen.set_prefix(
179            ReferenceType::Invoice,
180            &template_config.references.invoice_prefix,
181        );
182        ref_gen.set_prefix(
183            ReferenceType::PurchaseOrder,
184            &template_config.references.po_prefix,
185        );
186        ref_gen.set_prefix(
187            ReferenceType::SalesOrder,
188            &template_config.references.so_prefix,
189        );
190
191        // Create weighted company selector (uniform weights for this constructor)
192        let company_selector = WeightedCompanySelector::uniform(companies.clone());
193
194        Self {
195            rng: seeded_rng(seed, 0),
196            seed,
197            config: config.clone(),
198            coa,
199            companies,
200            company_selector,
201            line_sampler: LineItemSampler::with_config(
202                seed + 1,
203                config.line_item_distribution.clone(),
204                config.even_odd_distribution.clone(),
205                config.debit_credit_distribution.clone(),
206            ),
207            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
208            temporal_sampler: TemporalSampler::with_config(
209                seed + 3,
210                config.seasonality.clone(),
211                WorkingHoursConfig::default(),
212                Vec::new(),
213            ),
214            start_date,
215            end_date,
216            count: 0,
217            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
218            user_pool,
219            description_generator: DescriptionGenerator::new(),
220            reference_generator: ref_gen,
221            template_config,
222            vendor_pool: VendorPool::standard(),
223            customer_pool: CustomerPool::standard(),
224            material_pool: None,
225            using_real_master_data: false,
226            fraud_config: FraudConfig::default(),
227            persona_errors_enabled: true, // Enable by default for realism
228            approval_enabled: true,       // Enable by default for realism
229            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
230            sod_violation_rate: 0.10,     // 10% default SOD violation rate
231            batch_state: None,
232            drift_controller: None,
233            // Always provide a basic BusinessDayCalculator so that weekend/holiday
234            // filtering is active even when temporal_patterns is not explicitly enabled.
235            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
236                Region::US,
237                start_date.year(),
238            ))),
239            processing_lag_calculator: None,
240            temporal_patterns_config: None,
241        }
242    }
243
244    /// Create from a full GeneratorConfig.
245    ///
246    /// This constructor uses the volume_weight from company configs
247    /// for weighted company selection, and fraud config from GeneratorConfig.
248    pub fn from_generator_config(
249        full_config: &GeneratorConfig,
250        coa: Arc<ChartOfAccounts>,
251        start_date: NaiveDate,
252        end_date: NaiveDate,
253        seed: u64,
254    ) -> Self {
255        let companies: Vec<String> = full_config
256            .companies
257            .iter()
258            .map(|c| c.code.clone())
259            .collect();
260
261        // Create weighted selector using volume_weight from company configs
262        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
263
264        let mut generator = Self::new_with_full_config(
265            full_config.transactions.clone(),
266            coa,
267            companies,
268            start_date,
269            end_date,
270            seed,
271            full_config.templates.clone(),
272            None,
273        );
274
275        // Override the uniform selector with weighted selector
276        generator.company_selector = company_selector;
277
278        // Set fraud config
279        generator.fraud_config = full_config.fraud.clone();
280
281        // Configure temporal patterns if enabled
282        let temporal_config = &full_config.temporal_patterns;
283        if temporal_config.enabled {
284            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
285        }
286
287        generator
288    }
289
290    /// Configure temporal patterns including business day calculations and processing lags.
291    ///
292    /// This enables realistic temporal behavior including:
293    /// - Business day awareness (no postings on weekends/holidays)
294    /// - Processing lag modeling (event-to-posting delays)
295    /// - Period-end dynamics (volume spikes at month/quarter/year end)
296    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
297        // Create business day calculator if enabled
298        if config.business_days.enabled {
299            let region = config
300                .calendars
301                .regions
302                .first()
303                .map(|r| Self::parse_region(r))
304                .unwrap_or(Region::US);
305
306            let calendar = HolidayCalendar::new(region, self.start_date.year());
307            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
308        }
309
310        // Create processing lag calculator if enabled
311        if config.processing_lags.enabled {
312            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
313            self.processing_lag_calculator =
314                Some(ProcessingLagCalculator::with_config(seed, lag_config));
315        }
316
317        // Create period-end dynamics if configured
318        let model = config.period_end.model.as_deref().unwrap_or("flat");
319        if model != "flat"
320            || config
321                .period_end
322                .month_end
323                .as_ref()
324                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
325        {
326            let dynamics = Self::convert_period_end_config(&config.period_end);
327            self.temporal_sampler.set_period_end_dynamics(dynamics);
328        }
329
330        self.temporal_patterns_config = Some(config);
331        self
332    }
333
334    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
335    ///
336    /// This is an alternative to [`with_temporal_patterns`] that derives the
337    /// holiday calendar from a country-pack definition rather than the built-in
338    /// region-based calendars.  All other temporal behaviour (business-day
339    /// adjustment, processing lags, period-end dynamics) is configured
340    /// identically.
341    pub fn with_country_pack_temporal(
342        mut self,
343        config: TemporalPatternsConfig,
344        seed: u64,
345        pack: &CountryPack,
346    ) -> Self {
347        // Create business day calculator using the country pack calendar
348        if config.business_days.enabled {
349            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
350            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
351        }
352
353        // Create processing lag calculator if enabled
354        if config.processing_lags.enabled {
355            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
356            self.processing_lag_calculator =
357                Some(ProcessingLagCalculator::with_config(seed, lag_config));
358        }
359
360        // Create period-end dynamics if configured
361        let model = config.period_end.model.as_deref().unwrap_or("flat");
362        if model != "flat"
363            || config
364                .period_end
365                .month_end
366                .as_ref()
367                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
368        {
369            let dynamics = Self::convert_period_end_config(&config.period_end);
370            self.temporal_sampler.set_period_end_dynamics(dynamics);
371        }
372
373        self.temporal_patterns_config = Some(config);
374        self
375    }
376
377    /// Convert schema processing lag config to core config.
378    fn convert_processing_lag_config(
379        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
380    ) -> ProcessingLagConfig {
381        let mut config = ProcessingLagConfig {
382            enabled: schema.enabled,
383            ..Default::default()
384        };
385
386        // Helper to convert lag schema to distribution
387        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
388            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
389            if let Some(min) = lag.min_hours {
390                dist.min_lag_hours = min;
391            }
392            if let Some(max) = lag.max_hours {
393                dist.max_lag_hours = max;
394            }
395            dist
396        };
397
398        // Apply event-specific lags
399        if let Some(ref lag) = schema.sales_order_lag {
400            config
401                .event_lags
402                .insert(EventType::SalesOrder, convert_lag(lag));
403        }
404        if let Some(ref lag) = schema.purchase_order_lag {
405            config
406                .event_lags
407                .insert(EventType::PurchaseOrder, convert_lag(lag));
408        }
409        if let Some(ref lag) = schema.goods_receipt_lag {
410            config
411                .event_lags
412                .insert(EventType::GoodsReceipt, convert_lag(lag));
413        }
414        if let Some(ref lag) = schema.invoice_receipt_lag {
415            config
416                .event_lags
417                .insert(EventType::InvoiceReceipt, convert_lag(lag));
418        }
419        if let Some(ref lag) = schema.invoice_issue_lag {
420            config
421                .event_lags
422                .insert(EventType::InvoiceIssue, convert_lag(lag));
423        }
424        if let Some(ref lag) = schema.payment_lag {
425            config
426                .event_lags
427                .insert(EventType::Payment, convert_lag(lag));
428        }
429        if let Some(ref lag) = schema.journal_entry_lag {
430            config
431                .event_lags
432                .insert(EventType::JournalEntry, convert_lag(lag));
433        }
434
435        // Apply cross-day posting config
436        if let Some(ref cross_day) = schema.cross_day_posting {
437            config.cross_day = CrossDayConfig {
438                enabled: cross_day.enabled,
439                probability_by_hour: cross_day.probability_by_hour.clone(),
440                ..Default::default()
441            };
442        }
443
444        config
445    }
446
447    /// Convert schema period-end config to core PeriodEndDynamics.
448    fn convert_period_end_config(
449        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
450    ) -> PeriodEndDynamics {
451        let model_type = schema.model.as_deref().unwrap_or("exponential");
452
453        // Helper to convert period config
454        let convert_period =
455            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
456             default_peak: f64|
457             -> PeriodEndConfig {
458                if let Some(p) = period {
459                    let model = match model_type {
460                        "flat" => PeriodEndModel::FlatMultiplier {
461                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                        },
463                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
464                            start_day: p.start_day.unwrap_or(-10),
465                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
466                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467                            ramp_up_days: 3, // Default ramp-up period
468                        },
469                        _ => PeriodEndModel::ExponentialAcceleration {
470                            start_day: p.start_day.unwrap_or(-10),
471                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
472                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
473                            decay_rate: p.decay_rate.unwrap_or(0.3),
474                        },
475                    };
476                    PeriodEndConfig {
477                        enabled: true,
478                        model,
479                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
480                    }
481                } else {
482                    PeriodEndConfig {
483                        enabled: true,
484                        model: PeriodEndModel::ExponentialAcceleration {
485                            start_day: -10,
486                            base_multiplier: 1.0,
487                            peak_multiplier: default_peak,
488                            decay_rate: 0.3,
489                        },
490                        additional_multiplier: 1.0,
491                    }
492                }
493            };
494
495        PeriodEndDynamics::new(
496            convert_period(schema.month_end.as_ref(), 2.0),
497            convert_period(schema.quarter_end.as_ref(), 3.5),
498            convert_period(schema.year_end.as_ref(), 5.0),
499        )
500    }
501
502    /// Parse a region string into a Region enum.
503    fn parse_region(region_str: &str) -> Region {
504        match region_str.to_uppercase().as_str() {
505            "US" => Region::US,
506            "DE" => Region::DE,
507            "GB" => Region::GB,
508            "CN" => Region::CN,
509            "JP" => Region::JP,
510            "IN" => Region::IN,
511            "BR" => Region::BR,
512            "MX" => Region::MX,
513            "AU" => Region::AU,
514            "SG" => Region::SG,
515            "KR" => Region::KR,
516            "FR" => Region::FR,
517            "IT" => Region::IT,
518            "ES" => Region::ES,
519            "CA" => Region::CA,
520            _ => Region::US,
521        }
522    }
523
524    /// Set a custom company selector.
525    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
526        self.company_selector = selector;
527    }
528
529    /// Get the current company selector.
530    pub fn company_selector(&self) -> &WeightedCompanySelector {
531        &self.company_selector
532    }
533
534    /// Set fraud configuration.
535    pub fn set_fraud_config(&mut self, config: FraudConfig) {
536        self.fraud_config = config;
537    }
538
539    /// Set vendors from generated master data.
540    ///
541    /// This replaces the default vendor pool with actual generated vendors,
542    /// ensuring JEs reference real master data entities.
543    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
544        if !vendors.is_empty() {
545            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
546            self.using_real_master_data = true;
547        }
548        self
549    }
550
551    /// Set customers from generated master data.
552    ///
553    /// This replaces the default customer pool with actual generated customers,
554    /// ensuring JEs reference real master data entities.
555    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
556        if !customers.is_empty() {
557            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
558            self.using_real_master_data = true;
559        }
560        self
561    }
562
563    /// Set materials from generated master data.
564    ///
565    /// This provides material references for JEs that involve inventory movements.
566    pub fn with_materials(mut self, materials: &[Material]) -> Self {
567        if !materials.is_empty() {
568            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
569            self.using_real_master_data = true;
570        }
571        self
572    }
573
574    /// Set all master data at once for convenience.
575    ///
576    /// This is the recommended way to configure the JE generator with
577    /// generated master data to ensure data coherence.
578    pub fn with_master_data(
579        self,
580        vendors: &[Vendor],
581        customers: &[Customer],
582        materials: &[Material],
583    ) -> Self {
584        self.with_vendors(vendors)
585            .with_customers(customers)
586            .with_materials(materials)
587    }
588
589    /// Replace the user pool with one generated from a [`CountryPack`].
590    ///
591    /// This is an alternative to the default name-culture distribution that
592    /// derives name pools and weights from the country-pack's `names` section.
593    /// The existing user pool (if any) is discarded and regenerated using
594    /// [`MultiCultureNameGenerator::from_country_pack`].
595    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
596        let name_gen =
597            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
598        let config = UserGeneratorConfig {
599            // The culture distribution is embedded in the name generator
600            // itself, so we use an empty list here.
601            culture_distribution: Vec::new(),
602            email_domain: name_gen.email_domain().to_string(),
603            generate_realistic_names: true,
604        };
605        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
606        self.user_pool = Some(user_gen.generate_standard(&self.companies));
607        self
608    }
609
610    /// Check if the generator is using real master data.
611    pub fn is_using_real_master_data(&self) -> bool {
612        self.using_real_master_data
613    }
614
615    /// Determine if this transaction should be fraudulent.
616    fn determine_fraud(&mut self) -> Option<FraudType> {
617        if !self.fraud_config.enabled {
618            return None;
619        }
620
621        // Roll for fraud based on fraud rate
622        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
623            return None;
624        }
625
626        // Select fraud type based on distribution
627        Some(self.select_fraud_type())
628    }
629
630    /// Select a fraud type based on the configured distribution.
631    fn select_fraud_type(&mut self) -> FraudType {
632        let dist = &self.fraud_config.fraud_type_distribution;
633        let roll: f64 = self.rng.random();
634
635        let mut cumulative = 0.0;
636
637        cumulative += dist.suspense_account_abuse;
638        if roll < cumulative {
639            return FraudType::SuspenseAccountAbuse;
640        }
641
642        cumulative += dist.fictitious_transaction;
643        if roll < cumulative {
644            return FraudType::FictitiousTransaction;
645        }
646
647        cumulative += dist.revenue_manipulation;
648        if roll < cumulative {
649            return FraudType::RevenueManipulation;
650        }
651
652        cumulative += dist.expense_capitalization;
653        if roll < cumulative {
654            return FraudType::ExpenseCapitalization;
655        }
656
657        cumulative += dist.split_transaction;
658        if roll < cumulative {
659            return FraudType::SplitTransaction;
660        }
661
662        cumulative += dist.timing_anomaly;
663        if roll < cumulative {
664            return FraudType::TimingAnomaly;
665        }
666
667        cumulative += dist.unauthorized_access;
668        if roll < cumulative {
669            return FraudType::UnauthorizedAccess;
670        }
671
672        // Default fallback
673        FraudType::DuplicatePayment
674    }
675
676    /// Map a fraud type to an amount pattern for suspicious amounts.
677    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
678        match fraud_type {
679            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
680                FraudAmountPattern::ThresholdAdjacent
681            }
682            FraudType::FictitiousTransaction
683            | FraudType::FictitiousEntry
684            | FraudType::SuspenseAccountAbuse
685            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
686            FraudType::RevenueManipulation
687            | FraudType::ExpenseCapitalization
688            | FraudType::ImproperCapitalization
689            | FraudType::ReserveManipulation
690            | FraudType::UnauthorizedAccess
691            | FraudType::PrematureRevenue
692            | FraudType::UnderstatedLiabilities
693            | FraudType::OverstatedAssets
694            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
695            FraudType::DuplicatePayment
696            | FraudType::TimingAnomaly
697            | FraudType::SelfApproval
698            | FraudType::ExceededApprovalLimit
699            | FraudType::SegregationOfDutiesViolation
700            | FraudType::UnauthorizedApproval
701            | FraudType::CollusiveApproval
702            | FraudType::FictitiousVendor
703            | FraudType::ShellCompanyPayment
704            | FraudType::Kickback
705            | FraudType::KickbackScheme
706            | FraudType::InvoiceManipulation
707            | FraudType::AssetMisappropriation
708            | FraudType::InventoryTheft
709            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
710            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
711            FraudType::ImproperRevenueRecognition
712            | FraudType::ImproperPoAllocation
713            | FraudType::VariableConsiderationManipulation
714            | FraudType::ContractModificationMisstatement => {
715                FraudAmountPattern::StatisticallyImprobable
716            }
717            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
718            FraudType::LeaseClassificationManipulation
719            | FraudType::OffBalanceSheetLease
720            | FraudType::LeaseLiabilityUnderstatement
721            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
722            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
723            FraudType::FairValueHierarchyManipulation
724            | FraudType::Level3InputManipulation
725            | FraudType::ValuationTechniqueManipulation => {
726                FraudAmountPattern::StatisticallyImprobable
727            }
728            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
729            FraudType::DelayedImpairment
730            | FraudType::ImpairmentTestAvoidance
731            | FraudType::CashFlowProjectionManipulation
732            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
733            // Sourcing/Procurement Fraud
734            FraudType::BidRigging
735            | FraudType::PhantomVendorContract
736            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
737            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
738            // HR/Payroll Fraud
739            FraudType::GhostEmployeePayroll
740            | FraudType::PayrollInflation
741            | FraudType::DuplicateExpenseReport
742            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
743            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
744            // O2C Fraud
745            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
746            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
747        }
748    }
749
750    /// Generate a deterministic UUID using the factory.
751    #[inline]
752    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
753        self.uuid_factory.next()
754    }
755
756    /// Cost center pool used for expense account enrichment.
757    const COST_CENTER_POOL: &'static [&'static str] =
758        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
759
760    /// Enrich journal entry line items with account descriptions, cost centers,
761    /// profit centers, value dates, line text, and assignment fields.
762    ///
763    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
764    /// and `::credit()` leave as `None`.
765    fn enrich_line_items(&self, entry: &mut JournalEntry) {
766        let posting_date = entry.header.posting_date;
767        let company_code = &entry.header.company_code;
768        let header_text = entry.header.header_text.clone();
769        let business_process = entry.header.business_process;
770
771        // Derive a deterministic index from the document_id for cost center selection
772        let doc_id_bytes = entry.header.document_id.as_bytes();
773        let mut cc_seed: usize = 0;
774        for &b in doc_id_bytes {
775            cc_seed = cc_seed.wrapping_add(b as usize);
776        }
777
778        for (i, line) in entry.lines.iter_mut().enumerate() {
779            // 1. account_description: look up from CoA
780            if line.account_description.is_none() {
781                line.account_description = self
782                    .coa
783                    .get_account(&line.gl_account)
784                    .map(|a| a.short_description.clone());
785            }
786
787            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
788            if line.cost_center.is_none() {
789                let first_char = line.gl_account.chars().next().unwrap_or('0');
790                if first_char == '5' || first_char == '6' {
791                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
792                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
793                }
794            }
795
796            // 3. profit_center: derive from company code + business process
797            if line.profit_center.is_none() {
798                let suffix = match business_process {
799                    Some(BusinessProcess::P2P) => "-P2P",
800                    Some(BusinessProcess::O2C) => "-O2C",
801                    Some(BusinessProcess::R2R) => "-R2R",
802                    Some(BusinessProcess::H2R) => "-H2R",
803                    _ => "",
804                };
805                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
806            }
807
808            // 4. line_text: fall back to header_text if not already set
809            if line.line_text.is_none() {
810                line.line_text = header_text.clone();
811            }
812
813            // 5. value_date: set to posting_date for AR/AP accounts
814            if line.value_date.is_none()
815                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
816            {
817                line.value_date = Some(posting_date);
818            }
819
820            // 6. assignment: set to vendor/customer reference for AP/AR lines
821            if line.assignment.is_none() {
822                if line.gl_account.starts_with("2000") {
823                    // AP line - use vendor reference from header
824                    if let Some(ref ht) = header_text {
825                        // Try to extract vendor ID from header text patterns like "... - V-001"
826                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
827                            if vendor_part.starts_with("V-")
828                                || vendor_part.starts_with("VENDOR")
829                                || vendor_part.starts_with("Vendor")
830                            {
831                                line.assignment = Some(vendor_part.to_string());
832                            }
833                        }
834                    }
835                } else if line.gl_account.starts_with("1100") {
836                    // AR line - use customer reference from header
837                    if let Some(ref ht) = header_text {
838                        if let Some(customer_part) = ht.rsplit(" - ").next() {
839                            if customer_part.starts_with("C-")
840                                || customer_part.starts_with("CUST")
841                                || customer_part.starts_with("Customer")
842                            {
843                                line.assignment = Some(customer_part.to_string());
844                            }
845                        }
846                    }
847                }
848            }
849        }
850    }
851
852    /// Generate a single journal entry.
853    pub fn generate(&mut self) -> JournalEntry {
854        debug!(
855            count = self.count,
856            companies = self.companies.len(),
857            start_date = %self.start_date,
858            end_date = %self.end_date,
859            "Generating journal entry"
860        );
861
862        // Check if we're in a batch - if so, generate a batched entry
863        if let Some(ref state) = self.batch_state {
864            if state.remaining > 0 {
865                return self.generate_batched_entry();
866            }
867        }
868
869        self.count += 1;
870
871        // Generate deterministic document ID
872        let document_id = self.generate_deterministic_uuid();
873
874        // Sample posting date
875        let mut posting_date = self
876            .temporal_sampler
877            .sample_date(self.start_date, self.end_date);
878
879        // Adjust posting date to be a business day if business day calculator is configured
880        if let Some(ref calc) = self.business_day_calculator {
881            if !calc.is_business_day(posting_date) {
882                // Move to next business day
883                posting_date = calc.next_business_day(posting_date, false);
884                // Ensure we don't exceed end_date
885                if posting_date > self.end_date {
886                    posting_date = calc.prev_business_day(self.end_date, true);
887                }
888            }
889        }
890
891        // Select company using weighted selector
892        let company_code = self.company_selector.select(&mut self.rng).to_string();
893
894        // Sample line item specification
895        let line_spec = self.line_sampler.sample();
896
897        // Determine source type using full 4-way distribution
898        let source = self.select_source();
899        let is_automated = matches!(
900            source,
901            TransactionSource::Automated | TransactionSource::Recurring
902        );
903
904        // Select business process
905        let business_process = self.select_business_process();
906
907        // Determine if this is a fraudulent transaction
908        let fraud_type = self.determine_fraud();
909        let is_fraud = fraud_type.is_some();
910
911        // Sample time based on source
912        let time = self.temporal_sampler.sample_time(!is_automated);
913        let created_at = posting_date.and_time(time).and_utc();
914
915        // Select user from pool or generate generic
916        let (created_by, user_persona) = self.select_user(is_automated);
917
918        // Create header with deterministic UUID
919        let mut header =
920            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
921        header.created_at = created_at;
922        header.source = source;
923        header.created_by = created_by;
924        header.user_persona = user_persona;
925        header.business_process = Some(business_process);
926        header.document_type = Self::document_type_for_process(business_process).to_string();
927        header.is_fraud = is_fraud;
928        header.fraud_type = fraud_type;
929
930        // --- ISA 240 audit flags ---
931        let is_manual = matches!(source, TransactionSource::Manual);
932        header.is_manual = is_manual;
933
934        // Determine source_system based on manual vs automated
935        header.source_system = if is_manual {
936            if self.rng.random::<f64>() < 0.70 {
937                "manual".to_string()
938            } else {
939                "spreadsheet".to_string()
940            }
941        } else {
942            let roll: f64 = self.rng.random();
943            if roll < 0.40 {
944                "SAP-FI".to_string()
945            } else if roll < 0.60 {
946                "SAP-MM".to_string()
947            } else if roll < 0.80 {
948                "SAP-SD".to_string()
949            } else if roll < 0.95 {
950                "interface".to_string()
951            } else {
952                "SAP-HR".to_string()
953            }
954        };
955
956        // is_post_close: entry is in the last month of the configured period
957        // and the posting date falls after the 25th (simulating close cutoff)
958        let is_post_close = posting_date.month() == self.end_date.month()
959            && posting_date.year() == self.end_date.year()
960            && posting_date.day() > 25;
961        header.is_post_close = is_post_close;
962
963        // created_date: for manual entries, same day as posting; for automated,
964        // 0-3 days before posting_date
965        let created_date = if is_manual {
966            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
967        } else {
968            let lag_days = self.rng.random_range(0i64..=3);
969            let created_naive_date = posting_date
970                .checked_sub_signed(chrono::Duration::days(lag_days))
971                .unwrap_or(posting_date);
972            created_naive_date.and_hms_opt(
973                self.rng.random_range(8u32..=17),
974                self.rng.random_range(0u32..=59),
975                self.rng.random_range(0u32..=59),
976            )
977        };
978        header.created_date = created_date;
979
980        // Generate description context
981        let mut context =
982            DescriptionContext::with_period(posting_date.month(), posting_date.year());
983
984        // Add vendor/customer context based on business process
985        match business_process {
986            BusinessProcess::P2P => {
987                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
988                    context.vendor_name = Some(vendor.name.clone());
989                }
990            }
991            BusinessProcess::O2C => {
992                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
993                    context.customer_name = Some(customer.name.clone());
994                }
995            }
996            _ => {}
997        }
998
999        // Generate header text if enabled
1000        if self.template_config.descriptions.generate_header_text {
1001            header.header_text = Some(self.description_generator.generate_header_text(
1002                business_process,
1003                &context,
1004                &mut self.rng,
1005            ));
1006        }
1007
1008        // Generate reference if enabled
1009        if self.template_config.references.generate_references {
1010            header.reference = Some(
1011                self.reference_generator
1012                    .generate_for_process_year(business_process, posting_date.year()),
1013            );
1014        }
1015
1016        // Derive typed source document from reference prefix
1017        header.source_document = header
1018            .reference
1019            .as_deref()
1020            .and_then(DocumentRef::parse)
1021            .or_else(|| {
1022                if header.source == TransactionSource::Manual {
1023                    Some(DocumentRef::Manual)
1024                } else {
1025                    None
1026                }
1027            });
1028
1029        // Generate line items
1030        let mut entry = JournalEntry::new(header);
1031
1032        // Generate amount - use fraud pattern if this is a fraudulent transaction
1033        let base_amount = if let Some(ft) = fraud_type {
1034            let pattern = self.fraud_type_to_amount_pattern(ft);
1035            self.amount_sampler.sample_fraud(pattern)
1036        } else {
1037            self.amount_sampler.sample()
1038        };
1039
1040        // Apply temporal drift if configured
1041        let drift_adjusted_amount = {
1042            let drift = self.get_drift_adjustments(posting_date);
1043            if drift.amount_mean_multiplier != 1.0 {
1044                // Apply drift multiplier (includes seasonal factor if enabled)
1045                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1046                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1047                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1048            } else {
1049                base_amount
1050            }
1051        };
1052
1053        // Apply human variation to amounts for non-automated transactions
1054        let total_amount = if is_automated {
1055            drift_adjusted_amount // Automated systems use exact amounts
1056        } else {
1057            self.apply_human_variation(drift_adjusted_amount)
1058        };
1059
1060        // Generate debit lines
1061        let debit_amounts = self
1062            .amount_sampler
1063            .sample_summing_to(line_spec.debit_count, total_amount);
1064        for (i, amount) in debit_amounts.into_iter().enumerate() {
1065            let account_number = self.select_debit_account().account_number.clone();
1066            let mut line = JournalEntryLine::debit(
1067                entry.header.document_id,
1068                (i + 1) as u32,
1069                account_number.clone(),
1070                amount,
1071            );
1072
1073            // Generate line text if enabled
1074            if self.template_config.descriptions.generate_line_text {
1075                line.line_text = Some(self.description_generator.generate_line_text(
1076                    &account_number,
1077                    &context,
1078                    &mut self.rng,
1079                ));
1080            }
1081
1082            entry.add_line(line);
1083        }
1084
1085        // Generate credit lines - use the SAME amounts to ensure balance
1086        let credit_amounts = self
1087            .amount_sampler
1088            .sample_summing_to(line_spec.credit_count, total_amount);
1089        for (i, amount) in credit_amounts.into_iter().enumerate() {
1090            let account_number = self.select_credit_account().account_number.clone();
1091            let mut line = JournalEntryLine::credit(
1092                entry.header.document_id,
1093                (line_spec.debit_count + i + 1) as u32,
1094                account_number.clone(),
1095                amount,
1096            );
1097
1098            // Generate line text if enabled
1099            if self.template_config.descriptions.generate_line_text {
1100                line.line_text = Some(self.description_generator.generate_line_text(
1101                    &account_number,
1102                    &context,
1103                    &mut self.rng,
1104                ));
1105            }
1106
1107            entry.add_line(line);
1108        }
1109
1110        // Enrich line items with account descriptions, cost centers, etc.
1111        self.enrich_line_items(&mut entry);
1112
1113        // Apply persona-based errors if enabled and it's a human user
1114        if self.persona_errors_enabled && !is_automated {
1115            self.maybe_inject_persona_error(&mut entry);
1116        }
1117
1118        // Apply approval workflow if enabled and amount exceeds threshold
1119        if self.approval_enabled {
1120            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1121        }
1122
1123        // Populate approved_by / approval_date from the approval workflow
1124        self.populate_approval_fields(&mut entry, posting_date);
1125
1126        // Maybe start a batch of similar entries for realism
1127        self.maybe_start_batch(&entry);
1128
1129        entry
1130    }
1131
1132    /// Enable or disable persona-based error injection.
1133    ///
1134    /// When enabled, entries created by human personas have a chance
1135    /// to contain realistic human errors based on their experience level.
1136    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1137        self.persona_errors_enabled = enabled;
1138        self
1139    }
1140
1141    /// Set fraud configuration for fraud injection.
1142    ///
1143    /// When fraud is enabled in the config, transactions have a chance
1144    /// to be marked as fraudulent based on the configured fraud rate.
1145    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1146        self.fraud_config = config;
1147        self
1148    }
1149
1150    /// Check if persona errors are enabled.
1151    pub fn persona_errors_enabled(&self) -> bool {
1152        self.persona_errors_enabled
1153    }
1154
1155    /// Enable or disable batch processing behavior.
1156    ///
1157    /// When enabled (default), the generator will occasionally produce batches
1158    /// of similar entries, simulating how humans batch similar work together.
1159    pub fn with_batching(mut self, enabled: bool) -> Self {
1160        if !enabled {
1161            self.batch_state = None;
1162        }
1163        self
1164    }
1165
1166    /// Check if batch processing is enabled.
1167    pub fn batching_enabled(&self) -> bool {
1168        // Batching is implicitly enabled when not explicitly disabled
1169        true
1170    }
1171
1172    /// Maybe start a batch based on the current entry.
1173    ///
1174    /// Humans often batch similar work: processing invoices from one vendor,
1175    /// entering expense reports for a trip, reconciling similar items.
1176    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1177        // Only start batch for non-automated, non-fraud entries
1178        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1179            return;
1180        }
1181
1182        // 15% chance to start a batch (most work is not batched)
1183        if self.rng.random::<f64>() > 0.15 {
1184            return;
1185        }
1186
1187        // Extract key attributes for batching
1188        let base_account = entry
1189            .lines
1190            .first()
1191            .map(|l| l.gl_account.clone())
1192            .unwrap_or_default();
1193
1194        let base_amount = entry.total_debit();
1195
1196        self.batch_state = Some(BatchState {
1197            base_account_number: base_account,
1198            base_amount,
1199            base_business_process: entry.header.business_process,
1200            base_posting_date: entry.header.posting_date,
1201            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1202        });
1203    }
1204
1205    /// Generate an entry that's part of the current batch.
1206    ///
1207    /// Batched entries have:
1208    /// - Same or very similar business process
1209    /// - Same posting date (batched work done together)
1210    /// - Similar amounts (within ±15%)
1211    /// - Same debit account (processing similar items)
1212    fn generate_batched_entry(&mut self) -> JournalEntry {
1213        use rust_decimal::Decimal;
1214
1215        // Decrement batch counter
1216        if let Some(ref mut state) = self.batch_state {
1217            state.remaining = state.remaining.saturating_sub(1);
1218        }
1219
1220        let Some(batch) = self.batch_state.clone() else {
1221            // This is a programming error - batch_state should be set before calling this method.
1222            // Clear state and fall back to generating a standard entry instead of panicking.
1223            tracing::warn!(
1224                "generate_batched_entry called without batch_state; generating standard entry"
1225            );
1226            self.batch_state = None;
1227            return self.generate();
1228        };
1229
1230        // Use the batch's posting date (work done on same day)
1231        let posting_date = batch.base_posting_date;
1232
1233        self.count += 1;
1234        let document_id = self.generate_deterministic_uuid();
1235
1236        // Select same company (batched work is usually same company)
1237        let company_code = self.company_selector.select(&mut self.rng).to_string();
1238
1239        // Use simplified line spec for batched entries (usually 2-line)
1240        let _line_spec = LineItemSpec {
1241            total_count: 2,
1242            debit_count: 1,
1243            credit_count: 1,
1244            split_type: DebitCreditSplit::Equal,
1245        };
1246
1247        // Batched entries are always manual
1248        let source = TransactionSource::Manual;
1249
1250        // Use the batch's business process
1251        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1252
1253        // Sample time
1254        let time = self.temporal_sampler.sample_time(true);
1255        let created_at = posting_date.and_time(time).and_utc();
1256
1257        // Same user for batched work
1258        let (created_by, user_persona) = self.select_user(false);
1259
1260        // Create header
1261        let mut header =
1262            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1263        header.created_at = created_at;
1264        header.source = source;
1265        header.created_by = created_by;
1266        header.user_persona = user_persona;
1267        header.business_process = Some(business_process);
1268        header.document_type = Self::document_type_for_process(business_process).to_string();
1269
1270        // Batched manual entries have Manual source document
1271        header.source_document = Some(DocumentRef::Manual);
1272
1273        // ISA 240 audit flags for batched entries (always manual)
1274        header.is_manual = true;
1275        header.source_system = if self.rng.random::<f64>() < 0.70 {
1276            "manual".to_string()
1277        } else {
1278            "spreadsheet".to_string()
1279        };
1280        header.is_post_close = posting_date.month() == self.end_date.month()
1281            && posting_date.year() == self.end_date.year()
1282            && posting_date.day() > 25;
1283        header.created_date =
1284            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1285
1286        // Generate similar amount (within ±15% of base)
1287        let variation = self.rng.random_range(-0.15..0.15);
1288        let varied_amount =
1289            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1290        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1291
1292        // Create the entry
1293        let mut entry = JournalEntry::new(header);
1294
1295        // Use same debit account as batch base
1296        let debit_line = JournalEntryLine::debit(
1297            entry.header.document_id,
1298            1,
1299            batch.base_account_number.clone(),
1300            total_amount,
1301        );
1302        entry.add_line(debit_line);
1303
1304        // Select a credit account
1305        let credit_account = self.select_credit_account().account_number.clone();
1306        let credit_line =
1307            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1308        entry.add_line(credit_line);
1309
1310        // Enrich line items with account descriptions, cost centers, etc.
1311        self.enrich_line_items(&mut entry);
1312
1313        // Apply persona-based errors if enabled
1314        if self.persona_errors_enabled {
1315            self.maybe_inject_persona_error(&mut entry);
1316        }
1317
1318        // Apply approval workflow if enabled
1319        if self.approval_enabled {
1320            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1321        }
1322
1323        // Populate approved_by / approval_date from the approval workflow
1324        self.populate_approval_fields(&mut entry, posting_date);
1325
1326        // Clear batch state if no more entries remaining
1327        if batch.remaining <= 1 {
1328            self.batch_state = None;
1329        }
1330
1331        entry
1332    }
1333
1334    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1335    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1336        // Parse persona from the entry header
1337        let persona_str = &entry.header.user_persona;
1338        let persona = match persona_str.to_lowercase().as_str() {
1339            s if s.contains("junior") => UserPersona::JuniorAccountant,
1340            s if s.contains("senior") => UserPersona::SeniorAccountant,
1341            s if s.contains("controller") => UserPersona::Controller,
1342            s if s.contains("manager") => UserPersona::Manager,
1343            s if s.contains("executive") => UserPersona::Executive,
1344            _ => return, // Don't inject errors for unknown personas
1345        };
1346
1347        // Get base error rate from persona
1348        let base_error_rate = persona.error_rate();
1349
1350        // Apply stress factors based on posting date
1351        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1352
1353        // Check if error should occur based on adjusted rate
1354        if self.rng.random::<f64>() >= adjusted_rate {
1355            return; // No error this time
1356        }
1357
1358        // Select and inject persona-appropriate error
1359        self.inject_human_error(entry, persona);
1360    }
1361
1362    /// Apply contextual stress factors to the base error rate.
1363    ///
1364    /// Stress factors increase error likelihood during:
1365    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1366    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1367    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1368    /// - Monday morning (catch-up work): 20% more errors
1369    /// - Friday afternoon (rushing to leave): 30% more errors
1370    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1371        use chrono::Datelike;
1372
1373        let mut rate = base_rate;
1374        let day = posting_date.day();
1375        let month = posting_date.month();
1376
1377        // Year-end stress (December 28-31): double the error rate
1378        if month == 12 && day >= 28 {
1379            rate *= 2.0;
1380            return rate.min(0.5); // Cap at 50% to keep it realistic
1381        }
1382
1383        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1384        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1385            rate *= 1.75; // 75% more errors at quarter end
1386            return rate.min(0.4);
1387        }
1388
1389        // Month-end stress (last 3 days of month)
1390        if day >= 28 {
1391            rate *= 1.5; // 50% more errors at month end
1392        }
1393
1394        // Day-of-week stress effects
1395        let weekday = posting_date.weekday();
1396        match weekday {
1397            chrono::Weekday::Mon => {
1398                // Monday: catching up, often rushed
1399                rate *= 1.2;
1400            }
1401            chrono::Weekday::Fri => {
1402                // Friday: rushing to finish before weekend
1403                rate *= 1.3;
1404            }
1405            _ => {}
1406        }
1407
1408        // Cap at 40% to keep it realistic
1409        rate.min(0.4)
1410    }
1411
1412    /// Apply human-like variation to an amount.
1413    ///
1414    /// Humans don't enter perfectly calculated amounts - they:
1415    /// - Round amounts differently
1416    /// - Estimate instead of calculating exactly
1417    /// - Make small input variations
1418    ///
1419    /// This applies small variations (typically ±2%) to make amounts more realistic.
1420    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1421        use rust_decimal::Decimal;
1422
1423        // Automated transactions or very small amounts don't get variation
1424        if amount < Decimal::from(10) {
1425            return amount;
1426        }
1427
1428        // 70% chance of human variation being applied
1429        if self.rng.random::<f64>() > 0.70 {
1430            return amount;
1431        }
1432
1433        // Decide which type of human variation to apply
1434        let variation_type: u8 = self.rng.random_range(0..4);
1435
1436        match variation_type {
1437            0 => {
1438                // ±2% variation (common for estimated amounts)
1439                let variation_pct = self.rng.random_range(-0.02..0.02);
1440                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1441                (amount + variation).round_dp(2)
1442            }
1443            1 => {
1444                // Round to nearest $10
1445                let ten = Decimal::from(10);
1446                (amount / ten).round() * ten
1447            }
1448            2 => {
1449                // Round to nearest $100 (for larger amounts)
1450                if amount >= Decimal::from(500) {
1451                    let hundred = Decimal::from(100);
1452                    (amount / hundred).round() * hundred
1453                } else {
1454                    amount
1455                }
1456            }
1457            3 => {
1458                // Slight under/over payment (±$0.01 to ±$1.00)
1459                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1460                (amount + cents).max(Decimal::ZERO).round_dp(2)
1461            }
1462            _ => amount,
1463        }
1464    }
1465
1466    /// Rebalance an entry after a one-sided amount modification.
1467    ///
1468    /// When an error modifies one line's amount, this finds a line on the opposite
1469    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1470    /// same impact to maintain balance.
1471    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1472        // Find a line on the opposite side to adjust
1473        let balancing_idx = entry.lines.iter().position(|l| {
1474            if modified_was_debit {
1475                l.credit_amount > Decimal::ZERO
1476            } else {
1477                l.debit_amount > Decimal::ZERO
1478            }
1479        });
1480
1481        if let Some(idx) = balancing_idx {
1482            if modified_was_debit {
1483                entry.lines[idx].credit_amount += impact;
1484            } else {
1485                entry.lines[idx].debit_amount += impact;
1486            }
1487        }
1488    }
1489
1490    /// Inject a human-like error based on the persona.
1491    ///
1492    /// All error types maintain balance - amount modifications are applied to both sides.
1493    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1494    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1495        use rust_decimal::Decimal;
1496
1497        // Different personas make different types of errors
1498        let error_type: u8 = match persona {
1499            UserPersona::JuniorAccountant => {
1500                // Junior accountants make more varied errors
1501                self.rng.random_range(0..5)
1502            }
1503            UserPersona::SeniorAccountant => {
1504                // Senior accountants mainly make transposition errors
1505                self.rng.random_range(0..3)
1506            }
1507            UserPersona::Controller | UserPersona::Manager => {
1508                // Controllers/managers mainly make rounding or cutoff errors
1509                self.rng.random_range(3..5)
1510            }
1511            _ => return,
1512        };
1513
1514        match error_type {
1515            0 => {
1516                // Transposed digits in an amount
1517                if let Some(line) = entry.lines.get_mut(0) {
1518                    let is_debit = line.debit_amount > Decimal::ZERO;
1519                    let original_amount = if is_debit {
1520                        line.debit_amount
1521                    } else {
1522                        line.credit_amount
1523                    };
1524
1525                    // Simple digit swap in the string representation
1526                    let s = original_amount.to_string();
1527                    if s.len() >= 2 {
1528                        let chars: Vec<char> = s.chars().collect();
1529                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1530                        if chars[pos].is_ascii_digit()
1531                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1532                        {
1533                            let mut new_chars = chars;
1534                            new_chars.swap(pos, pos + 1);
1535                            if let Ok(new_amount) =
1536                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1537                            {
1538                                let impact = new_amount - original_amount;
1539
1540                                // Apply to the modified line
1541                                if is_debit {
1542                                    entry.lines[0].debit_amount = new_amount;
1543                                } else {
1544                                    entry.lines[0].credit_amount = new_amount;
1545                                }
1546
1547                                // Rebalance the entry
1548                                Self::rebalance_entry(entry, is_debit, impact);
1549
1550                                entry.header.header_text = Some(
1551                                    entry.header.header_text.clone().unwrap_or_default()
1552                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1553                                );
1554                            }
1555                        }
1556                    }
1557                }
1558            }
1559            1 => {
1560                // Wrong decimal place (off by factor of 10)
1561                if let Some(line) = entry.lines.get_mut(0) {
1562                    let is_debit = line.debit_amount > Decimal::ZERO;
1563                    let original_amount = if is_debit {
1564                        line.debit_amount
1565                    } else {
1566                        line.credit_amount
1567                    };
1568
1569                    let new_amount = original_amount * Decimal::new(10, 0);
1570                    let impact = new_amount - original_amount;
1571
1572                    // Apply to the modified line
1573                    if is_debit {
1574                        entry.lines[0].debit_amount = new_amount;
1575                    } else {
1576                        entry.lines[0].credit_amount = new_amount;
1577                    }
1578
1579                    // Rebalance the entry
1580                    Self::rebalance_entry(entry, is_debit, impact);
1581
1582                    entry.header.header_text = Some(
1583                        entry.header.header_text.clone().unwrap_or_default()
1584                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1585                    );
1586                }
1587            }
1588            2 => {
1589                // Typo in description (doesn't affect balance)
1590                if let Some(ref mut text) = entry.header.header_text {
1591                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1592                    let correct = ["the", "and", "with", "that", "receive"];
1593                    let idx = self.rng.random_range(0..typos.len());
1594                    if text.to_lowercase().contains(correct[idx]) {
1595                        *text = text.replace(correct[idx], typos[idx]);
1596                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1597                    }
1598                }
1599            }
1600            3 => {
1601                // Rounding to round number
1602                if let Some(line) = entry.lines.get_mut(0) {
1603                    let is_debit = line.debit_amount > Decimal::ZERO;
1604                    let original_amount = if is_debit {
1605                        line.debit_amount
1606                    } else {
1607                        line.credit_amount
1608                    };
1609
1610                    let new_amount =
1611                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1612                    let impact = new_amount - original_amount;
1613
1614                    // Apply to the modified line
1615                    if is_debit {
1616                        entry.lines[0].debit_amount = new_amount;
1617                    } else {
1618                        entry.lines[0].credit_amount = new_amount;
1619                    }
1620
1621                    // Rebalance the entry
1622                    Self::rebalance_entry(entry, is_debit, impact);
1623
1624                    entry.header.header_text = Some(
1625                        entry.header.header_text.clone().unwrap_or_default()
1626                            + " [HUMAN_ERROR:ROUNDED]",
1627                    );
1628                }
1629            }
1630            4 => {
1631                // Late posting marker (document date much earlier than posting date)
1632                // This doesn't create an imbalance
1633                if entry.header.document_date == entry.header.posting_date {
1634                    let days_late = self.rng.random_range(5..15);
1635                    entry.header.document_date =
1636                        entry.header.posting_date - chrono::Duration::days(days_late);
1637                    entry.header.header_text = Some(
1638                        entry.header.header_text.clone().unwrap_or_default()
1639                            + " [HUMAN_ERROR:LATE_POSTING]",
1640                    );
1641                }
1642            }
1643            _ => {}
1644        }
1645    }
1646
1647    /// Apply approval workflow for high-value transactions.
1648    ///
1649    /// If the entry amount exceeds the approval threshold, simulate an
1650    /// approval workflow with appropriate approvers based on amount.
1651    fn maybe_apply_approval_workflow(
1652        &mut self,
1653        entry: &mut JournalEntry,
1654        _posting_date: NaiveDate,
1655    ) {
1656        use rust_decimal::Decimal;
1657
1658        let amount = entry.total_debit();
1659
1660        // Skip if amount is below threshold
1661        if amount <= self.approval_threshold {
1662            // Auto-approved below threshold
1663            let workflow = ApprovalWorkflow::auto_approved(
1664                entry.header.created_by.clone(),
1665                entry.header.user_persona.clone(),
1666                amount,
1667                entry.header.created_at,
1668            );
1669            entry.header.approval_workflow = Some(workflow);
1670            return;
1671        }
1672
1673        // Mark as SOX relevant for high-value transactions
1674        entry.header.sox_relevant = true;
1675
1676        // Determine required approval levels based on amount
1677        let required_levels = if amount > Decimal::new(100000, 0) {
1678            3 // Executive approval required
1679        } else if amount > Decimal::new(50000, 0) {
1680            2 // Senior management approval
1681        } else {
1682            1 // Manager approval
1683        };
1684
1685        // Create the approval workflow
1686        let mut workflow = ApprovalWorkflow::new(
1687            entry.header.created_by.clone(),
1688            entry.header.user_persona.clone(),
1689            amount,
1690        );
1691        workflow.required_levels = required_levels;
1692
1693        // Simulate submission
1694        let submit_time = entry.header.created_at;
1695        let submit_action = ApprovalAction::new(
1696            entry.header.created_by.clone(),
1697            entry.header.user_persona.clone(),
1698            self.parse_persona(&entry.header.user_persona),
1699            ApprovalActionType::Submit,
1700            0,
1701        )
1702        .with_timestamp(submit_time);
1703
1704        workflow.actions.push(submit_action);
1705        workflow.status = ApprovalStatus::Pending;
1706        workflow.submitted_at = Some(submit_time);
1707
1708        // Simulate approvals with realistic delays
1709        let mut current_time = submit_time;
1710        for level in 1..=required_levels {
1711            // Add delay for approval (1-3 business hours per level)
1712            let delay_hours = self.rng.random_range(1..4);
1713            current_time += chrono::Duration::hours(delay_hours);
1714
1715            // Skip weekends
1716            while current_time.weekday() == chrono::Weekday::Sat
1717                || current_time.weekday() == chrono::Weekday::Sun
1718            {
1719                current_time += chrono::Duration::days(1);
1720            }
1721
1722            // Generate approver based on level
1723            let (approver_id, approver_role) = self.select_approver(level);
1724
1725            let approve_action = ApprovalAction::new(
1726                approver_id.clone(),
1727                approver_role.to_string(),
1728                approver_role,
1729                ApprovalActionType::Approve,
1730                level,
1731            )
1732            .with_timestamp(current_time);
1733
1734            workflow.actions.push(approve_action);
1735            workflow.current_level = level;
1736        }
1737
1738        // Mark as approved
1739        workflow.status = ApprovalStatus::Approved;
1740        workflow.approved_at = Some(current_time);
1741
1742        entry.header.approval_workflow = Some(workflow);
1743    }
1744
1745    /// Select an approver based on the required level.
1746    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1747        let persona = match level {
1748            1 => UserPersona::Manager,
1749            2 => UserPersona::Controller,
1750            _ => UserPersona::Executive,
1751        };
1752
1753        // Try to get from user pool first
1754        if let Some(ref pool) = self.user_pool {
1755            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1756                return (user.user_id.clone(), persona);
1757            }
1758        }
1759
1760        // Fallback to generated approver
1761        let approver_id = match persona {
1762            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1763            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1764            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1765            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1766        };
1767
1768        (approver_id, persona)
1769    }
1770
1771    /// Parse user persona from string.
1772    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1773        match persona_str.to_lowercase().as_str() {
1774            s if s.contains("junior") => UserPersona::JuniorAccountant,
1775            s if s.contains("senior") => UserPersona::SeniorAccountant,
1776            s if s.contains("controller") => UserPersona::Controller,
1777            s if s.contains("manager") => UserPersona::Manager,
1778            s if s.contains("executive") => UserPersona::Executive,
1779            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1780            _ => UserPersona::JuniorAccountant, // Default
1781        }
1782    }
1783
1784    /// Enable or disable approval workflow.
1785    pub fn with_approval(mut self, enabled: bool) -> Self {
1786        self.approval_enabled = enabled;
1787        self
1788    }
1789
1790    /// Set the approval threshold amount.
1791    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1792        self.approval_threshold = threshold;
1793        self
1794    }
1795
1796    /// Set the SOD violation rate for approval tracking.
1797    ///
1798    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
1799    /// that the approver is the same as the creator, which constitutes a SOD violation.
1800    /// Default is 0.10 (10%).
1801    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1802        self.sod_violation_rate = rate;
1803        self
1804    }
1805
1806    /// Populate `approved_by` and `approval_date` from the approval workflow,
1807    /// and flag SOD violations when the approver matches the creator.
1808    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1809        if let Some(ref workflow) = entry.header.approval_workflow {
1810            // Extract the last approver from the workflow actions
1811            let last_approver = workflow
1812                .actions
1813                .iter()
1814                .rev()
1815                .find(|a| matches!(a.action, ApprovalActionType::Approve));
1816
1817            if let Some(approver_action) = last_approver {
1818                entry.header.approved_by = Some(approver_action.actor_id.clone());
1819                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1820            } else {
1821                // No explicit approver (auto-approved); use the preparer
1822                entry.header.approved_by = Some(workflow.preparer_id.clone());
1823                entry.header.approval_date = Some(posting_date);
1824            }
1825
1826            // Inject SOD violation: with configured probability, set approver = creator
1827            if self.rng.random::<f64>() < self.sod_violation_rate {
1828                let creator = entry.header.created_by.clone();
1829                entry.header.approved_by = Some(creator);
1830                entry.header.sod_violation = true;
1831                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1832            }
1833        }
1834    }
1835
1836    /// Set the temporal drift controller for simulating distribution changes over time.
1837    ///
1838    /// When drift is enabled, amounts and other distributions will shift based on
1839    /// the period (month) to simulate realistic temporal evolution like inflation
1840    /// or increasing fraud rates.
1841    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1842        self.drift_controller = Some(controller);
1843        self
1844    }
1845
1846    /// Set drift configuration directly.
1847    ///
1848    /// Creates a drift controller from the config. Total periods is calculated
1849    /// from the date range.
1850    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1851        if config.enabled {
1852            let total_periods = self.calculate_total_periods();
1853            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1854        }
1855        self
1856    }
1857
1858    /// Calculate total periods (months) in the date range.
1859    fn calculate_total_periods(&self) -> u32 {
1860        let start_year = self.start_date.year();
1861        let start_month = self.start_date.month();
1862        let end_year = self.end_date.year();
1863        let end_month = self.end_date.month();
1864
1865        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1866    }
1867
1868    /// Calculate the period number (0-indexed) for a given date.
1869    fn date_to_period(&self, date: NaiveDate) -> u32 {
1870        let start_year = self.start_date.year();
1871        let start_month = self.start_date.month() as i32;
1872        let date_year = date.year();
1873        let date_month = date.month() as i32;
1874
1875        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1876    }
1877
1878    /// Get drift adjustments for a given date.
1879    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1880        if let Some(ref controller) = self.drift_controller {
1881            let period = self.date_to_period(date);
1882            controller.compute_adjustments(period)
1883        } else {
1884            DriftAdjustments::none()
1885        }
1886    }
1887
1888    /// Select a user from the pool or generate a generic user ID.
1889    #[inline]
1890    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1891        if let Some(ref pool) = self.user_pool {
1892            let persona = if is_automated {
1893                UserPersona::AutomatedSystem
1894            } else {
1895                // Random distribution among human personas
1896                let roll: f64 = self.rng.random();
1897                if roll < 0.4 {
1898                    UserPersona::JuniorAccountant
1899                } else if roll < 0.7 {
1900                    UserPersona::SeniorAccountant
1901                } else if roll < 0.85 {
1902                    UserPersona::Controller
1903                } else {
1904                    UserPersona::Manager
1905                }
1906            };
1907
1908            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1909                return (user.user_id.clone(), user.persona.to_string());
1910            }
1911        }
1912
1913        // Fallback to generic format
1914        if is_automated {
1915            (
1916                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1917                "automated_system".to_string(),
1918            )
1919        } else {
1920            (
1921                format!("USER{:04}", self.rng.random_range(1..=40)),
1922                "senior_accountant".to_string(),
1923            )
1924        }
1925    }
1926
1927    /// Select transaction source based on configuration weights.
1928    #[inline]
1929    fn select_source(&mut self) -> TransactionSource {
1930        let roll: f64 = self.rng.random();
1931        let dist = &self.config.source_distribution;
1932
1933        if roll < dist.manual {
1934            TransactionSource::Manual
1935        } else if roll < dist.manual + dist.automated {
1936            TransactionSource::Automated
1937        } else if roll < dist.manual + dist.automated + dist.recurring {
1938            TransactionSource::Recurring
1939        } else {
1940            TransactionSource::Adjustment
1941        }
1942    }
1943
1944    /// Select a business process based on configuration weights.
1945    #[inline]
1946    /// Map a business process to a SAP-style document type code.
1947    ///
1948    /// - P2P → "KR" (vendor invoice)
1949    /// - O2C → "DR" (customer invoice)
1950    /// - R2R → "SA" (general journal)
1951    /// - H2R → "HR" (HR posting)
1952    /// - A2R → "AA" (asset posting)
1953    /// - others → "SA"
1954    fn document_type_for_process(process: BusinessProcess) -> &'static str {
1955        match process {
1956            BusinessProcess::P2P => "KR",
1957            BusinessProcess::O2C => "DR",
1958            BusinessProcess::R2R => "SA",
1959            BusinessProcess::H2R => "HR",
1960            BusinessProcess::A2R => "AA",
1961            _ => "SA",
1962        }
1963    }
1964
1965    fn select_business_process(&mut self) -> BusinessProcess {
1966        let roll: f64 = self.rng.random();
1967
1968        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1969        if roll < 0.35 {
1970            BusinessProcess::O2C
1971        } else if roll < 0.65 {
1972            BusinessProcess::P2P
1973        } else if roll < 0.85 {
1974            BusinessProcess::R2R
1975        } else if roll < 0.95 {
1976            BusinessProcess::H2R
1977        } else {
1978            BusinessProcess::A2R
1979        }
1980    }
1981
1982    #[inline]
1983    fn select_debit_account(&mut self) -> &GLAccount {
1984        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1985        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1986
1987        // 60% asset, 40% expense for debits
1988        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1989            accounts
1990        } else {
1991            expense_accounts
1992        };
1993
1994        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1995            tracing::warn!(
1996                "Account selection returned empty list, falling back to first COA account"
1997            );
1998            &self.coa.accounts[0]
1999        })
2000    }
2001
2002    #[inline]
2003    fn select_credit_account(&mut self) -> &GLAccount {
2004        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2005        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2006
2007        // 60% liability, 40% revenue for credits
2008        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2009            liability_accounts
2010        } else {
2011            revenue_accounts
2012        };
2013
2014        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2015            tracing::warn!(
2016                "Account selection returned empty list, falling back to first COA account"
2017            );
2018            &self.coa.accounts[0]
2019        })
2020    }
2021}
2022
2023impl Generator for JournalEntryGenerator {
2024    type Item = JournalEntry;
2025    type Config = (
2026        TransactionConfig,
2027        Arc<ChartOfAccounts>,
2028        Vec<String>,
2029        NaiveDate,
2030        NaiveDate,
2031    );
2032
2033    fn new(config: Self::Config, seed: u64) -> Self {
2034        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2035    }
2036
2037    fn generate_one(&mut self) -> Self::Item {
2038        self.generate()
2039    }
2040
2041    fn reset(&mut self) {
2042        self.rng = seeded_rng(self.seed, 0);
2043        self.line_sampler.reset(self.seed + 1);
2044        self.amount_sampler.reset(self.seed + 2);
2045        self.temporal_sampler.reset(self.seed + 3);
2046        self.count = 0;
2047        self.uuid_factory.reset();
2048
2049        // Reset reference generator by recreating it
2050        let mut ref_gen = ReferenceGenerator::new(
2051            self.start_date.year(),
2052            self.companies
2053                .first()
2054                .map(std::string::String::as_str)
2055                .unwrap_or("1000"),
2056        );
2057        ref_gen.set_prefix(
2058            ReferenceType::Invoice,
2059            &self.template_config.references.invoice_prefix,
2060        );
2061        ref_gen.set_prefix(
2062            ReferenceType::PurchaseOrder,
2063            &self.template_config.references.po_prefix,
2064        );
2065        ref_gen.set_prefix(
2066            ReferenceType::SalesOrder,
2067            &self.template_config.references.so_prefix,
2068        );
2069        self.reference_generator = ref_gen;
2070    }
2071
2072    fn count(&self) -> u64 {
2073        self.count
2074    }
2075
2076    fn seed(&self) -> u64 {
2077        self.seed
2078    }
2079}
2080
2081use datasynth_core::traits::ParallelGenerator;
2082
2083impl ParallelGenerator for JournalEntryGenerator {
2084    /// Split this generator into `parts` independent sub-generators.
2085    ///
2086    /// Each sub-generator gets a deterministic seed derived from the parent seed
2087    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2088    /// The results are deterministic for a given partition count.
2089    fn split(self, parts: usize) -> Vec<Self> {
2090        let parts = parts.max(1);
2091        (0..parts)
2092            .map(|i| {
2093                // Derive a unique seed per partition using a golden-ratio constant
2094                let sub_seed = self
2095                    .seed
2096                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2097
2098                let mut gen = JournalEntryGenerator::new_with_full_config(
2099                    self.config.clone(),
2100                    Arc::clone(&self.coa),
2101                    self.companies.clone(),
2102                    self.start_date,
2103                    self.end_date,
2104                    sub_seed,
2105                    self.template_config.clone(),
2106                    self.user_pool.clone(),
2107                );
2108
2109                // Copy over configuration state
2110                gen.company_selector = self.company_selector.clone();
2111                gen.vendor_pool = self.vendor_pool.clone();
2112                gen.customer_pool = self.customer_pool.clone();
2113                gen.material_pool = self.material_pool.clone();
2114                gen.using_real_master_data = self.using_real_master_data;
2115                gen.fraud_config = self.fraud_config.clone();
2116                gen.persona_errors_enabled = self.persona_errors_enabled;
2117                gen.approval_enabled = self.approval_enabled;
2118                gen.approval_threshold = self.approval_threshold;
2119                gen.sod_violation_rate = self.sod_violation_rate;
2120
2121                // Use partitioned UUID factory to eliminate atomic contention
2122                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2123                    sub_seed,
2124                    GeneratorType::JournalEntry,
2125                    i as u8,
2126                );
2127
2128                // Copy temporal patterns if configured
2129                if let Some(ref config) = self.temporal_patterns_config {
2130                    gen.temporal_patterns_config = Some(config.clone());
2131                    // Rebuild business day calculator from the stored config
2132                    if config.business_days.enabled {
2133                        if let Some(ref bdc) = self.business_day_calculator {
2134                            gen.business_day_calculator = Some(bdc.clone());
2135                        }
2136                    }
2137                    // Rebuild processing lag calculator with partition seed
2138                    if config.processing_lags.enabled {
2139                        let lag_config =
2140                            Self::convert_processing_lag_config(&config.processing_lags);
2141                        gen.processing_lag_calculator =
2142                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2143                    }
2144                }
2145
2146                // Copy drift controller if present
2147                if let Some(ref dc) = self.drift_controller {
2148                    gen.drift_controller = Some(dc.clone());
2149                }
2150
2151                gen
2152            })
2153            .collect()
2154    }
2155}
2156
2157#[cfg(test)]
2158#[allow(clippy::unwrap_used)]
2159mod tests {
2160    use super::*;
2161    use crate::ChartOfAccountsGenerator;
2162
2163    #[test]
2164    fn test_generate_balanced_entries() {
2165        let mut coa_gen =
2166            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2167        let coa = Arc::new(coa_gen.generate());
2168
2169        let mut je_gen = JournalEntryGenerator::new_with_params(
2170            TransactionConfig::default(),
2171            coa,
2172            vec!["1000".to_string()],
2173            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2174            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2175            42,
2176        );
2177
2178        let mut balanced_count = 0;
2179        for _ in 0..100 {
2180            let entry = je_gen.generate();
2181
2182            // Skip entries with human errors as they may be intentionally unbalanced
2183            let has_human_error = entry
2184                .header
2185                .header_text
2186                .as_ref()
2187                .map(|t| t.contains("[HUMAN_ERROR:"))
2188                .unwrap_or(false);
2189
2190            if !has_human_error {
2191                assert!(
2192                    entry.is_balanced(),
2193                    "Entry {:?} is not balanced",
2194                    entry.header.document_id
2195                );
2196                balanced_count += 1;
2197            }
2198            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2199        }
2200
2201        // Ensure most entries are balanced (human errors are rare)
2202        assert!(
2203            balanced_count >= 80,
2204            "Expected at least 80 balanced entries, got {}",
2205            balanced_count
2206        );
2207    }
2208
2209    #[test]
2210    fn test_deterministic_generation() {
2211        let mut coa_gen =
2212            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2213        let coa = Arc::new(coa_gen.generate());
2214
2215        let mut gen1 = JournalEntryGenerator::new_with_params(
2216            TransactionConfig::default(),
2217            Arc::clone(&coa),
2218            vec!["1000".to_string()],
2219            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2220            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2221            42,
2222        );
2223
2224        let mut gen2 = JournalEntryGenerator::new_with_params(
2225            TransactionConfig::default(),
2226            coa,
2227            vec!["1000".to_string()],
2228            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2229            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2230            42,
2231        );
2232
2233        for _ in 0..50 {
2234            let e1 = gen1.generate();
2235            let e2 = gen2.generate();
2236            assert_eq!(e1.header.document_id, e2.header.document_id);
2237            assert_eq!(e1.total_debit(), e2.total_debit());
2238        }
2239    }
2240
2241    #[test]
2242    fn test_templates_generate_descriptions() {
2243        let mut coa_gen =
2244            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2245        let coa = Arc::new(coa_gen.generate());
2246
2247        // Enable all template features
2248        let template_config = TemplateConfig {
2249            names: datasynth_config::schema::NameTemplateConfig {
2250                generate_realistic_names: true,
2251                email_domain: "test.com".to_string(),
2252                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2253            },
2254            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2255                generate_header_text: true,
2256                generate_line_text: true,
2257            },
2258            references: datasynth_config::schema::ReferenceTemplateConfig {
2259                generate_references: true,
2260                invoice_prefix: "TEST-INV".to_string(),
2261                po_prefix: "TEST-PO".to_string(),
2262                so_prefix: "TEST-SO".to_string(),
2263            },
2264        };
2265
2266        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2267            TransactionConfig::default(),
2268            coa,
2269            vec!["1000".to_string()],
2270            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2271            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2272            42,
2273            template_config,
2274            None,
2275        )
2276        .with_persona_errors(false); // Disable for template testing
2277
2278        for _ in 0..10 {
2279            let entry = je_gen.generate();
2280
2281            // Verify header text is populated
2282            assert!(
2283                entry.header.header_text.is_some(),
2284                "Header text should be populated"
2285            );
2286
2287            // Verify reference is populated
2288            assert!(
2289                entry.header.reference.is_some(),
2290                "Reference should be populated"
2291            );
2292
2293            // Verify business process is set
2294            assert!(
2295                entry.header.business_process.is_some(),
2296                "Business process should be set"
2297            );
2298
2299            // Verify line text is populated
2300            for line in &entry.lines {
2301                assert!(line.line_text.is_some(), "Line text should be populated");
2302            }
2303
2304            // Entry should still be balanced
2305            assert!(entry.is_balanced());
2306        }
2307    }
2308
2309    #[test]
2310    fn test_user_pool_integration() {
2311        let mut coa_gen =
2312            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2313        let coa = Arc::new(coa_gen.generate());
2314
2315        let companies = vec!["1000".to_string()];
2316
2317        // Generate user pool
2318        let mut user_gen = crate::UserGenerator::new(42);
2319        let user_pool = user_gen.generate_standard(&companies);
2320
2321        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2322            TransactionConfig::default(),
2323            coa,
2324            companies,
2325            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2326            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2327            42,
2328            TemplateConfig::default(),
2329            Some(user_pool),
2330        );
2331
2332        // Generate entries and verify user IDs are from pool
2333        for _ in 0..20 {
2334            let entry = je_gen.generate();
2335
2336            // User ID should not be generic BATCH/USER format when pool is used
2337            // (though it may still fall back if random selection misses)
2338            assert!(!entry.header.created_by.is_empty());
2339        }
2340    }
2341
2342    #[test]
2343    fn test_master_data_connection() {
2344        let mut coa_gen =
2345            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2346        let coa = Arc::new(coa_gen.generate());
2347
2348        // Create test vendors
2349        let vendors = vec![
2350            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2351            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2352        ];
2353
2354        // Create test customers
2355        let customers = vec![
2356            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2357            Customer::new(
2358                "C-TEST-002",
2359                "Test Customer Two",
2360                CustomerType::SmallBusiness,
2361            ),
2362        ];
2363
2364        // Create test materials
2365        let materials = vec![Material::new(
2366            "MAT-TEST-001",
2367            "Test Material A",
2368            MaterialType::RawMaterial,
2369        )];
2370
2371        // Create generator with master data
2372        let generator = JournalEntryGenerator::new_with_params(
2373            TransactionConfig::default(),
2374            coa,
2375            vec!["1000".to_string()],
2376            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2377            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2378            42,
2379        );
2380
2381        // Without master data
2382        assert!(!generator.is_using_real_master_data());
2383
2384        // Connect master data
2385        let generator_with_data = generator
2386            .with_vendors(&vendors)
2387            .with_customers(&customers)
2388            .with_materials(&materials);
2389
2390        // Should now be using real master data
2391        assert!(generator_with_data.is_using_real_master_data());
2392    }
2393
2394    #[test]
2395    fn test_with_master_data_convenience_method() {
2396        let mut coa_gen =
2397            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2398        let coa = Arc::new(coa_gen.generate());
2399
2400        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2401        let customers = vec![Customer::new(
2402            "C-001",
2403            "Customer One",
2404            CustomerType::Corporate,
2405        )];
2406        let materials = vec![Material::new(
2407            "MAT-001",
2408            "Material One",
2409            MaterialType::RawMaterial,
2410        )];
2411
2412        let generator = JournalEntryGenerator::new_with_params(
2413            TransactionConfig::default(),
2414            coa,
2415            vec!["1000".to_string()],
2416            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2417            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2418            42,
2419        )
2420        .with_master_data(&vendors, &customers, &materials);
2421
2422        assert!(generator.is_using_real_master_data());
2423    }
2424
2425    #[test]
2426    fn test_stress_factors_increase_error_rate() {
2427        let mut coa_gen =
2428            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2429        let coa = Arc::new(coa_gen.generate());
2430
2431        let generator = JournalEntryGenerator::new_with_params(
2432            TransactionConfig::default(),
2433            coa,
2434            vec!["1000".to_string()],
2435            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2436            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2437            42,
2438        );
2439
2440        let base_rate = 0.1;
2441
2442        // Regular day - no stress factors
2443        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2444        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2445        assert!(
2446            (regular_rate - base_rate).abs() < 0.01,
2447            "Regular day should have minimal stress factor adjustment"
2448        );
2449
2450        // Month end - 50% more errors
2451        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2452        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2453        assert!(
2454            month_end_rate > regular_rate,
2455            "Month end should have higher error rate than regular day"
2456        );
2457
2458        // Year end - double the error rate
2459        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2460        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2461        assert!(
2462            year_end_rate > month_end_rate,
2463            "Year end should have highest error rate"
2464        );
2465
2466        // Friday stress
2467        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2468        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2469        assert!(
2470            friday_rate > regular_rate,
2471            "Friday should have higher error rate than mid-week"
2472        );
2473
2474        // Monday stress
2475        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2476        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2477        assert!(
2478            monday_rate > regular_rate,
2479            "Monday should have higher error rate than mid-week"
2480        );
2481    }
2482
2483    #[test]
2484    fn test_batching_produces_similar_entries() {
2485        let mut coa_gen =
2486            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2487        let coa = Arc::new(coa_gen.generate());
2488
2489        // Use seed 123 which is more likely to trigger batching
2490        let mut je_gen = JournalEntryGenerator::new_with_params(
2491            TransactionConfig::default(),
2492            coa,
2493            vec!["1000".to_string()],
2494            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2495            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2496            123,
2497        )
2498        .with_persona_errors(false); // Disable to ensure balanced entries
2499
2500        // Generate many entries - at 15% batch rate, should see some batches
2501        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2502
2503        // Check that all entries are balanced (batched or not)
2504        for entry in &entries {
2505            assert!(
2506                entry.is_balanced(),
2507                "All entries including batched should be balanced"
2508            );
2509        }
2510
2511        // Count entries with same-day posting dates (batch indicator)
2512        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2513            std::collections::HashMap::new();
2514        for entry in &entries {
2515            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2516        }
2517
2518        // With batching, some dates should have multiple entries
2519        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2520        assert!(
2521            dates_with_multiple > 0,
2522            "With batching, should see some dates with multiple entries"
2523        );
2524    }
2525
2526    #[test]
2527    fn test_temporal_patterns_business_days() {
2528        use datasynth_config::schema::{
2529            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2530        };
2531
2532        let mut coa_gen =
2533            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2534        let coa = Arc::new(coa_gen.generate());
2535
2536        // Create temporal patterns config with business days enabled
2537        let temporal_config = TemporalPatternsConfig {
2538            enabled: true,
2539            business_days: BusinessDaySchemaConfig {
2540                enabled: true,
2541                ..Default::default()
2542            },
2543            calendars: CalendarSchemaConfig {
2544                regions: vec!["US".to_string()],
2545                custom_holidays: vec![],
2546            },
2547            ..Default::default()
2548        };
2549
2550        let mut je_gen = JournalEntryGenerator::new_with_params(
2551            TransactionConfig::default(),
2552            coa,
2553            vec!["1000".to_string()],
2554            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2555            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2556            42,
2557        )
2558        .with_temporal_patterns(temporal_config, 42)
2559        .with_persona_errors(false);
2560
2561        // Generate entries and verify none fall on weekends
2562        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2563
2564        for entry in &entries {
2565            let weekday = entry.header.posting_date.weekday();
2566            assert!(
2567                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2568                "Posting date {:?} should not be a weekend",
2569                entry.header.posting_date
2570            );
2571        }
2572    }
2573
2574    #[test]
2575    fn test_default_generation_filters_weekends() {
2576        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2577        // This tests the fix where new_with_full_config always creates a default
2578        // BusinessDayCalculator with US holidays as a fallback.
2579        let mut coa_gen =
2580            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2581        let coa = Arc::new(coa_gen.generate());
2582
2583        let mut je_gen = JournalEntryGenerator::new_with_params(
2584            TransactionConfig::default(),
2585            coa,
2586            vec!["1000".to_string()],
2587            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2588            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2589            42,
2590        )
2591        .with_persona_errors(false);
2592
2593        let total = 500;
2594        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2595
2596        let weekend_count = entries
2597            .iter()
2598            .filter(|e| {
2599                let wd = e.header.posting_date.weekday();
2600                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2601            })
2602            .count();
2603
2604        let weekend_pct = weekend_count as f64 / total as f64;
2605        assert!(
2606            weekend_pct < 0.05,
2607            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2608             but got {:.1}% ({}/{})",
2609            weekend_pct * 100.0,
2610            weekend_count,
2611            total
2612        );
2613    }
2614
2615    #[test]
2616    fn test_document_type_derived_from_business_process() {
2617        let mut coa_gen =
2618            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2619        let coa = Arc::new(coa_gen.generate());
2620
2621        let mut je_gen = JournalEntryGenerator::new_with_params(
2622            TransactionConfig::default(),
2623            coa,
2624            vec!["1000".to_string()],
2625            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2626            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2627            99,
2628        )
2629        .with_persona_errors(false)
2630        .with_batching(false);
2631
2632        let total = 200;
2633        let mut doc_types = std::collections::HashSet::new();
2634        let mut sa_count = 0_usize;
2635
2636        for _ in 0..total {
2637            let entry = je_gen.generate();
2638            let dt = &entry.header.document_type;
2639            doc_types.insert(dt.clone());
2640            if dt == "SA" {
2641                sa_count += 1;
2642            }
2643        }
2644
2645        // Should have more than 3 distinct document types
2646        assert!(
2647            doc_types.len() > 3,
2648            "Expected >3 distinct document types, got {} ({:?})",
2649            doc_types.len(),
2650            doc_types,
2651        );
2652
2653        // "SA" should be less than 50% (R2R is 20% of the weight)
2654        let sa_pct = sa_count as f64 / total as f64;
2655        assert!(
2656            sa_pct < 0.50,
2657            "Expected SA <50%, got {:.1}% ({}/{})",
2658            sa_pct * 100.0,
2659            sa_count,
2660            total,
2661        );
2662    }
2663
2664    #[test]
2665    fn test_enrich_line_items_account_description() {
2666        let mut coa_gen =
2667            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2668        let coa = Arc::new(coa_gen.generate());
2669
2670        let mut je_gen = JournalEntryGenerator::new_with_params(
2671            TransactionConfig::default(),
2672            coa,
2673            vec!["1000".to_string()],
2674            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2675            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2676            42,
2677        )
2678        .with_persona_errors(false);
2679
2680        let total = 200;
2681        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2682
2683        // Count lines with account_description populated
2684        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2685        let lines_with_desc: usize = entries
2686            .iter()
2687            .flat_map(|e| &e.lines)
2688            .filter(|l| l.account_description.is_some())
2689            .count();
2690
2691        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2692        assert!(
2693            desc_pct > 0.95,
2694            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2695            desc_pct * 100.0,
2696            lines_with_desc,
2697            total_lines,
2698        );
2699    }
2700
2701    #[test]
2702    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2703        let mut coa_gen =
2704            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2705        let coa = Arc::new(coa_gen.generate());
2706
2707        let mut je_gen = JournalEntryGenerator::new_with_params(
2708            TransactionConfig::default(),
2709            coa,
2710            vec!["1000".to_string()],
2711            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2712            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2713            42,
2714        )
2715        .with_persona_errors(false);
2716
2717        let total = 300;
2718        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2719
2720        // Count expense account lines (5xxx/6xxx) with cost_center populated
2721        let expense_lines: Vec<&JournalEntryLine> = entries
2722            .iter()
2723            .flat_map(|e| &e.lines)
2724            .filter(|l| {
2725                let first = l.gl_account.chars().next().unwrap_or('0');
2726                first == '5' || first == '6'
2727            })
2728            .collect();
2729
2730        if !expense_lines.is_empty() {
2731            let with_cc = expense_lines
2732                .iter()
2733                .filter(|l| l.cost_center.is_some())
2734                .count();
2735            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2736            assert!(
2737                cc_pct > 0.80,
2738                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2739                cc_pct * 100.0,
2740                with_cc,
2741                expense_lines.len(),
2742            );
2743        }
2744    }
2745
2746    #[test]
2747    fn test_enrich_line_items_profit_center_and_line_text() {
2748        let mut coa_gen =
2749            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2750        let coa = Arc::new(coa_gen.generate());
2751
2752        let mut je_gen = JournalEntryGenerator::new_with_params(
2753            TransactionConfig::default(),
2754            coa,
2755            vec!["1000".to_string()],
2756            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2757            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2758            42,
2759        )
2760        .with_persona_errors(false);
2761
2762        let total = 100;
2763        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2764
2765        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2766
2767        // All lines should have profit_center
2768        let with_pc = entries
2769            .iter()
2770            .flat_map(|e| &e.lines)
2771            .filter(|l| l.profit_center.is_some())
2772            .count();
2773        let pc_pct = with_pc as f64 / total_lines as f64;
2774        assert!(
2775            pc_pct > 0.95,
2776            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2777            pc_pct * 100.0,
2778            with_pc,
2779            total_lines,
2780        );
2781
2782        // All lines should have line_text (either from template or header fallback)
2783        let with_text = entries
2784            .iter()
2785            .flat_map(|e| &e.lines)
2786            .filter(|l| l.line_text.is_some())
2787            .count();
2788        let text_pct = with_text as f64 / total_lines as f64;
2789        assert!(
2790            text_pct > 0.95,
2791            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2792            text_pct * 100.0,
2793            with_text,
2794            total_lines,
2795        );
2796    }
2797
2798    // --- ISA 240 audit flag tests ---
2799
2800    #[test]
2801    fn test_je_has_audit_flags() {
2802        let mut coa_gen =
2803            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2804        let coa = Arc::new(coa_gen.generate());
2805
2806        let mut je_gen = JournalEntryGenerator::new_with_params(
2807            TransactionConfig::default(),
2808            coa,
2809            vec!["1000".to_string()],
2810            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2811            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2812            42,
2813        )
2814        .with_persona_errors(false);
2815
2816        for _ in 0..100 {
2817            let entry = je_gen.generate();
2818
2819            // source_system should always be non-empty
2820            assert!(
2821                !entry.header.source_system.is_empty(),
2822                "source_system should be populated, got empty string"
2823            );
2824
2825            // created_by should always be non-empty (already tested elsewhere, but confirm)
2826            assert!(
2827                !entry.header.created_by.is_empty(),
2828                "created_by should be populated"
2829            );
2830
2831            // created_date should always be populated
2832            assert!(
2833                entry.header.created_date.is_some(),
2834                "created_date should be populated"
2835            );
2836        }
2837    }
2838
2839    #[test]
2840    fn test_manual_entry_rate() {
2841        let mut coa_gen =
2842            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2843        let coa = Arc::new(coa_gen.generate());
2844
2845        let mut je_gen = JournalEntryGenerator::new_with_params(
2846            TransactionConfig::default(),
2847            coa,
2848            vec!["1000".to_string()],
2849            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2850            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2851            42,
2852        )
2853        .with_persona_errors(false)
2854        .with_batching(false);
2855
2856        let total = 1000;
2857        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2858
2859        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
2860        let manual_rate = manual_count as f64 / total as f64;
2861
2862        // Default source_distribution.manual is typically around 0.05-0.15
2863        // Allow a wide tolerance for statistical variation
2864        assert!(
2865            manual_rate > 0.01 && manual_rate < 0.50,
2866            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
2867            manual_rate * 100.0,
2868            manual_count,
2869            total,
2870        );
2871
2872        // is_manual should match TransactionSource::Manual
2873        for entry in &entries {
2874            let source_is_manual = entry.header.source == TransactionSource::Manual;
2875            assert_eq!(
2876                entry.header.is_manual, source_is_manual,
2877                "is_manual should match source == Manual"
2878            );
2879        }
2880    }
2881
2882    #[test]
2883    fn test_manual_source_consistency() {
2884        let mut coa_gen =
2885            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2886        let coa = Arc::new(coa_gen.generate());
2887
2888        let mut je_gen = JournalEntryGenerator::new_with_params(
2889            TransactionConfig::default(),
2890            coa,
2891            vec!["1000".to_string()],
2892            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2893            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2894            42,
2895        )
2896        .with_persona_errors(false)
2897        .with_batching(false);
2898
2899        for _ in 0..500 {
2900            let entry = je_gen.generate();
2901
2902            if entry.header.is_manual {
2903                // Manual entries must have source_system "manual" or "spreadsheet"
2904                assert!(
2905                    entry.header.source_system == "manual"
2906                        || entry.header.source_system == "spreadsheet",
2907                    "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
2908                    entry.header.source_system,
2909                );
2910            } else {
2911                // Non-manual entries must NOT have source_system "manual" or "spreadsheet"
2912                assert!(
2913                    entry.header.source_system != "manual"
2914                        && entry.header.source_system != "spreadsheet",
2915                    "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
2916                    entry.header.source_system,
2917                );
2918            }
2919        }
2920    }
2921
2922    #[test]
2923    fn test_created_date_before_posting() {
2924        let mut coa_gen =
2925            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2926        let coa = Arc::new(coa_gen.generate());
2927
2928        let mut je_gen = JournalEntryGenerator::new_with_params(
2929            TransactionConfig::default(),
2930            coa,
2931            vec!["1000".to_string()],
2932            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2933            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2934            42,
2935        )
2936        .with_persona_errors(false);
2937
2938        for _ in 0..500 {
2939            let entry = je_gen.generate();
2940
2941            if let Some(created_date) = entry.header.created_date {
2942                let created_naive_date = created_date.date();
2943                assert!(
2944                    created_naive_date <= entry.header.posting_date,
2945                    "created_date ({}) should be <= posting_date ({})",
2946                    created_naive_date,
2947                    entry.header.posting_date,
2948                );
2949            }
2950        }
2951    }
2952}