Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // SOD violation rate for approval tracking (0.0 to 1.0)
66    sod_violation_rate: f64,
67    // Batching behavior - humans often process similar items together
68    batch_state: Option<BatchState>,
69    // Temporal drift controller for simulating distribution changes over time
70    drift_controller: Option<DriftController>,
71    // Temporal patterns components
72    business_day_calculator: Option<BusinessDayCalculator>,
73    processing_lag_calculator: Option<ProcessingLagCalculator>,
74    temporal_patterns_config: Option<TemporalPatternsConfig>,
75    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
76    // sum to 1.0 (validated by config schema). Default matches the legacy
77    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
78    business_process_weights: [(BusinessProcess, f64); 5],
79}
80
81const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
82    (BusinessProcess::O2C, 0.35),
83    (BusinessProcess::P2P, 0.30),
84    (BusinessProcess::R2R, 0.20),
85    (BusinessProcess::H2R, 0.10),
86    (BusinessProcess::A2R, 0.05),
87];
88
89/// State for tracking batch processing behavior.
90///
91/// When humans process transactions, they often batch similar items together
92/// (e.g., processing all invoices from one vendor, entering similar expenses).
93#[derive(Clone)]
94struct BatchState {
95    /// The base entry template to vary
96    base_account_number: String,
97    base_amount: rust_decimal::Decimal,
98    base_business_process: Option<BusinessProcess>,
99    base_posting_date: NaiveDate,
100    /// Remaining entries in this batch
101    remaining: u8,
102}
103
104impl JournalEntryGenerator {
105    /// Create a new journal entry generator.
106    pub fn new_with_params(
107        config: TransactionConfig,
108        coa: Arc<ChartOfAccounts>,
109        companies: Vec<String>,
110        start_date: NaiveDate,
111        end_date: NaiveDate,
112        seed: u64,
113    ) -> Self {
114        Self::new_with_full_config(
115            config,
116            coa,
117            companies,
118            start_date,
119            end_date,
120            seed,
121            TemplateConfig::default(),
122            None,
123        )
124    }
125
126    /// Create a new journal entry generator with full configuration.
127    #[allow(clippy::too_many_arguments)]
128    pub fn new_with_full_config(
129        config: TransactionConfig,
130        coa: Arc<ChartOfAccounts>,
131        companies: Vec<String>,
132        start_date: NaiveDate,
133        end_date: NaiveDate,
134        seed: u64,
135        template_config: TemplateConfig,
136        user_pool: Option<UserPool>,
137    ) -> Self {
138        // Initialize user pool if not provided
139        let user_pool = user_pool.or_else(|| {
140            if template_config.names.generate_realistic_names {
141                let user_gen_config = UserGeneratorConfig {
142                    culture_distribution: vec![
143                        (
144                            datasynth_core::templates::NameCulture::WesternUs,
145                            template_config.names.culture_distribution.western_us,
146                        ),
147                        (
148                            datasynth_core::templates::NameCulture::Hispanic,
149                            template_config.names.culture_distribution.hispanic,
150                        ),
151                        (
152                            datasynth_core::templates::NameCulture::German,
153                            template_config.names.culture_distribution.german,
154                        ),
155                        (
156                            datasynth_core::templates::NameCulture::French,
157                            template_config.names.culture_distribution.french,
158                        ),
159                        (
160                            datasynth_core::templates::NameCulture::Chinese,
161                            template_config.names.culture_distribution.chinese,
162                        ),
163                        (
164                            datasynth_core::templates::NameCulture::Japanese,
165                            template_config.names.culture_distribution.japanese,
166                        ),
167                        (
168                            datasynth_core::templates::NameCulture::Indian,
169                            template_config.names.culture_distribution.indian,
170                        ),
171                    ],
172                    email_domain: template_config.names.email_domain.clone(),
173                    generate_realistic_names: true,
174                };
175                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
176                Some(user_gen.generate_standard(&companies))
177            } else {
178                None
179            }
180        });
181
182        // Initialize reference generator
183        let mut ref_gen = ReferenceGenerator::new(
184            start_date.year(),
185            companies
186                .first()
187                .map(std::string::String::as_str)
188                .unwrap_or("1000"),
189        );
190        ref_gen.set_prefix(
191            ReferenceType::Invoice,
192            &template_config.references.invoice_prefix,
193        );
194        ref_gen.set_prefix(
195            ReferenceType::PurchaseOrder,
196            &template_config.references.po_prefix,
197        );
198        ref_gen.set_prefix(
199            ReferenceType::SalesOrder,
200            &template_config.references.so_prefix,
201        );
202
203        // Create weighted company selector (uniform weights for this constructor)
204        let company_selector = WeightedCompanySelector::uniform(companies.clone());
205
206        Self {
207            rng: seeded_rng(seed, 0),
208            seed,
209            config: config.clone(),
210            coa,
211            companies,
212            company_selector,
213            line_sampler: LineItemSampler::with_config(
214                seed + 1,
215                config.line_item_distribution.clone(),
216                config.even_odd_distribution.clone(),
217                config.debit_credit_distribution.clone(),
218            ),
219            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
220            temporal_sampler: TemporalSampler::with_config(
221                seed + 3,
222                config.seasonality.clone(),
223                WorkingHoursConfig::default(),
224                Vec::new(),
225            ),
226            start_date,
227            end_date,
228            count: 0,
229            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
230            user_pool,
231            description_generator: DescriptionGenerator::new(),
232            reference_generator: ref_gen,
233            template_config,
234            vendor_pool: VendorPool::standard(),
235            customer_pool: CustomerPool::standard(),
236            material_pool: None,
237            using_real_master_data: false,
238            fraud_config: FraudConfig::default(),
239            persona_errors_enabled: true, // Enable by default for realism
240            approval_enabled: true,       // Enable by default for realism
241            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
242            sod_violation_rate: 0.10,     // 10% default SOD violation rate
243            batch_state: None,
244            drift_controller: None,
245            // Always provide a basic BusinessDayCalculator so that weekend/holiday
246            // filtering is active even when temporal_patterns is not explicitly enabled.
247            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
248                Region::US,
249                start_date.year(),
250            ))),
251            processing_lag_calculator: None,
252            temporal_patterns_config: None,
253            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
254        }
255    }
256
257    /// Override the business-process volume mix. Weights map directly to the
258    /// `business_processes.*_weight` YAML config; they do not have to sum to
259    /// exactly 1.0 (they're normalized via `weighted_select`).
260    pub fn set_business_process_weights(
261        &mut self,
262        o2c: f64,
263        p2p: f64,
264        r2r: f64,
265        h2r: f64,
266        a2r: f64,
267    ) {
268        self.business_process_weights = [
269            (BusinessProcess::O2C, o2c),
270            (BusinessProcess::P2P, p2p),
271            (BusinessProcess::R2R, r2r),
272            (BusinessProcess::H2R, h2r),
273            (BusinessProcess::A2R, a2r),
274        ];
275    }
276
277    /// Create from a full GeneratorConfig.
278    ///
279    /// This constructor uses the volume_weight from company configs
280    /// for weighted company selection, and fraud config from GeneratorConfig.
281    pub fn from_generator_config(
282        full_config: &GeneratorConfig,
283        coa: Arc<ChartOfAccounts>,
284        start_date: NaiveDate,
285        end_date: NaiveDate,
286        seed: u64,
287    ) -> Self {
288        let companies: Vec<String> = full_config
289            .companies
290            .iter()
291            .map(|c| c.code.clone())
292            .collect();
293
294        // Create weighted selector using volume_weight from company configs
295        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
296
297        let mut generator = Self::new_with_full_config(
298            full_config.transactions.clone(),
299            coa,
300            companies,
301            start_date,
302            end_date,
303            seed,
304            full_config.templates.clone(),
305            None,
306        );
307
308        // Override the uniform selector with weighted selector
309        generator.company_selector = company_selector;
310
311        // Set fraud config
312        generator.fraud_config = full_config.fraud.clone();
313
314        // Configure temporal patterns if enabled
315        let temporal_config = &full_config.temporal_patterns;
316        if temporal_config.enabled {
317            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
318        }
319
320        generator
321    }
322
323    /// Configure temporal patterns including business day calculations and processing lags.
324    ///
325    /// This enables realistic temporal behavior including:
326    /// - Business day awareness (no postings on weekends/holidays)
327    /// - Processing lag modeling (event-to-posting delays)
328    /// - Period-end dynamics (volume spikes at month/quarter/year end)
329    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
330        // Create business day calculator if enabled
331        if config.business_days.enabled {
332            let region = config
333                .calendars
334                .regions
335                .first()
336                .map(|r| Self::parse_region(r))
337                .unwrap_or(Region::US);
338
339            let calendar = HolidayCalendar::new(region, self.start_date.year());
340            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
341        }
342
343        // Create processing lag calculator if enabled
344        if config.processing_lags.enabled {
345            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
346            self.processing_lag_calculator =
347                Some(ProcessingLagCalculator::with_config(seed, lag_config));
348        }
349
350        // Create period-end dynamics if configured
351        let model = config.period_end.model.as_deref().unwrap_or("flat");
352        if model != "flat"
353            || config
354                .period_end
355                .month_end
356                .as_ref()
357                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
358        {
359            let dynamics = Self::convert_period_end_config(&config.period_end);
360            self.temporal_sampler.set_period_end_dynamics(dynamics);
361        }
362
363        self.temporal_patterns_config = Some(config);
364        self
365    }
366
367    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
368    ///
369    /// This is an alternative to [`with_temporal_patterns`] that derives the
370    /// holiday calendar from a country-pack definition rather than the built-in
371    /// region-based calendars.  All other temporal behaviour (business-day
372    /// adjustment, processing lags, period-end dynamics) is configured
373    /// identically.
374    pub fn with_country_pack_temporal(
375        mut self,
376        config: TemporalPatternsConfig,
377        seed: u64,
378        pack: &CountryPack,
379    ) -> Self {
380        // Create business day calculator using the country pack calendar
381        if config.business_days.enabled {
382            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
383            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
384        }
385
386        // Create processing lag calculator if enabled
387        if config.processing_lags.enabled {
388            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
389            self.processing_lag_calculator =
390                Some(ProcessingLagCalculator::with_config(seed, lag_config));
391        }
392
393        // Create period-end dynamics if configured
394        let model = config.period_end.model.as_deref().unwrap_or("flat");
395        if model != "flat"
396            || config
397                .period_end
398                .month_end
399                .as_ref()
400                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
401        {
402            let dynamics = Self::convert_period_end_config(&config.period_end);
403            self.temporal_sampler.set_period_end_dynamics(dynamics);
404        }
405
406        self.temporal_patterns_config = Some(config);
407        self
408    }
409
410    /// Convert schema processing lag config to core config.
411    fn convert_processing_lag_config(
412        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
413    ) -> ProcessingLagConfig {
414        let mut config = ProcessingLagConfig {
415            enabled: schema.enabled,
416            ..Default::default()
417        };
418
419        // Helper to convert lag schema to distribution
420        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
421            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
422            if let Some(min) = lag.min_hours {
423                dist.min_lag_hours = min;
424            }
425            if let Some(max) = lag.max_hours {
426                dist.max_lag_hours = max;
427            }
428            dist
429        };
430
431        // Apply event-specific lags
432        if let Some(ref lag) = schema.sales_order_lag {
433            config
434                .event_lags
435                .insert(EventType::SalesOrder, convert_lag(lag));
436        }
437        if let Some(ref lag) = schema.purchase_order_lag {
438            config
439                .event_lags
440                .insert(EventType::PurchaseOrder, convert_lag(lag));
441        }
442        if let Some(ref lag) = schema.goods_receipt_lag {
443            config
444                .event_lags
445                .insert(EventType::GoodsReceipt, convert_lag(lag));
446        }
447        if let Some(ref lag) = schema.invoice_receipt_lag {
448            config
449                .event_lags
450                .insert(EventType::InvoiceReceipt, convert_lag(lag));
451        }
452        if let Some(ref lag) = schema.invoice_issue_lag {
453            config
454                .event_lags
455                .insert(EventType::InvoiceIssue, convert_lag(lag));
456        }
457        if let Some(ref lag) = schema.payment_lag {
458            config
459                .event_lags
460                .insert(EventType::Payment, convert_lag(lag));
461        }
462        if let Some(ref lag) = schema.journal_entry_lag {
463            config
464                .event_lags
465                .insert(EventType::JournalEntry, convert_lag(lag));
466        }
467
468        // Apply cross-day posting config
469        if let Some(ref cross_day) = schema.cross_day_posting {
470            config.cross_day = CrossDayConfig {
471                enabled: cross_day.enabled,
472                probability_by_hour: cross_day.probability_by_hour.clone(),
473                ..Default::default()
474            };
475        }
476
477        config
478    }
479
480    /// Convert schema period-end config to core PeriodEndDynamics.
481    fn convert_period_end_config(
482        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
483    ) -> PeriodEndDynamics {
484        let model_type = schema.model.as_deref().unwrap_or("exponential");
485
486        // Helper to convert period config
487        let convert_period =
488            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
489             default_peak: f64|
490             -> PeriodEndConfig {
491                if let Some(p) = period {
492                    let model = match model_type {
493                        "flat" => PeriodEndModel::FlatMultiplier {
494                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
495                        },
496                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
497                            start_day: p.start_day.unwrap_or(-10),
498                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
499                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
500                            ramp_up_days: 3, // Default ramp-up period
501                        },
502                        _ => PeriodEndModel::ExponentialAcceleration {
503                            start_day: p.start_day.unwrap_or(-10),
504                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
505                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
506                            decay_rate: p.decay_rate.unwrap_or(0.3),
507                        },
508                    };
509                    PeriodEndConfig {
510                        enabled: true,
511                        model,
512                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
513                    }
514                } else {
515                    PeriodEndConfig {
516                        enabled: true,
517                        model: PeriodEndModel::ExponentialAcceleration {
518                            start_day: -10,
519                            base_multiplier: 1.0,
520                            peak_multiplier: default_peak,
521                            decay_rate: 0.3,
522                        },
523                        additional_multiplier: 1.0,
524                    }
525                }
526            };
527
528        PeriodEndDynamics::new(
529            convert_period(schema.month_end.as_ref(), 2.0),
530            convert_period(schema.quarter_end.as_ref(), 3.5),
531            convert_period(schema.year_end.as_ref(), 5.0),
532        )
533    }
534
535    /// Parse a region string into a Region enum.
536    fn parse_region(region_str: &str) -> Region {
537        match region_str.to_uppercase().as_str() {
538            "US" => Region::US,
539            "DE" => Region::DE,
540            "GB" => Region::GB,
541            "CN" => Region::CN,
542            "JP" => Region::JP,
543            "IN" => Region::IN,
544            "BR" => Region::BR,
545            "MX" => Region::MX,
546            "AU" => Region::AU,
547            "SG" => Region::SG,
548            "KR" => Region::KR,
549            "FR" => Region::FR,
550            "IT" => Region::IT,
551            "ES" => Region::ES,
552            "CA" => Region::CA,
553            _ => Region::US,
554        }
555    }
556
557    /// Set a custom company selector.
558    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
559        self.company_selector = selector;
560    }
561
562    /// Get the current company selector.
563    pub fn company_selector(&self) -> &WeightedCompanySelector {
564        &self.company_selector
565    }
566
567    /// Set fraud configuration.
568    pub fn set_fraud_config(&mut self, config: FraudConfig) {
569        self.fraud_config = config;
570    }
571
572    /// Set vendors from generated master data.
573    ///
574    /// This replaces the default vendor pool with actual generated vendors,
575    /// ensuring JEs reference real master data entities.
576    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
577        if !vendors.is_empty() {
578            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
579            self.using_real_master_data = true;
580        }
581        self
582    }
583
584    /// Set customers from generated master data.
585    ///
586    /// This replaces the default customer pool with actual generated customers,
587    /// ensuring JEs reference real master data entities.
588    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
589        if !customers.is_empty() {
590            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
591            self.using_real_master_data = true;
592        }
593        self
594    }
595
596    /// Set materials from generated master data.
597    ///
598    /// This provides material references for JEs that involve inventory movements.
599    pub fn with_materials(mut self, materials: &[Material]) -> Self {
600        if !materials.is_empty() {
601            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
602            self.using_real_master_data = true;
603        }
604        self
605    }
606
607    /// Set all master data at once for convenience.
608    ///
609    /// This is the recommended way to configure the JE generator with
610    /// generated master data to ensure data coherence.
611    pub fn with_master_data(
612        self,
613        vendors: &[Vendor],
614        customers: &[Customer],
615        materials: &[Material],
616    ) -> Self {
617        self.with_vendors(vendors)
618            .with_customers(customers)
619            .with_materials(materials)
620    }
621
622    /// Replace the user pool with one generated from a [`CountryPack`].
623    ///
624    /// This is an alternative to the default name-culture distribution that
625    /// derives name pools and weights from the country-pack's `names` section.
626    /// The existing user pool (if any) is discarded and regenerated using
627    /// [`MultiCultureNameGenerator::from_country_pack`].
628    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
629        let name_gen =
630            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
631        let config = UserGeneratorConfig {
632            // The culture distribution is embedded in the name generator
633            // itself, so we use an empty list here.
634            culture_distribution: Vec::new(),
635            email_domain: name_gen.email_domain().to_string(),
636            generate_realistic_names: true,
637        };
638        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
639        self.user_pool = Some(user_gen.generate_standard(&self.companies));
640        self
641    }
642
643    /// Check if the generator is using real master data.
644    pub fn is_using_real_master_data(&self) -> bool {
645        self.using_real_master_data
646    }
647
648    /// Determine if this transaction should be fraudulent.
649    fn determine_fraud(&mut self) -> Option<FraudType> {
650        if !self.fraud_config.enabled {
651            return None;
652        }
653
654        // Roll for fraud based on fraud rate
655        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
656            return None;
657        }
658
659        // Select fraud type based on distribution
660        Some(self.select_fraud_type())
661    }
662
663    /// Select a fraud type based on the configured distribution.
664    fn select_fraud_type(&mut self) -> FraudType {
665        let dist = &self.fraud_config.fraud_type_distribution;
666        let roll: f64 = self.rng.random();
667
668        let mut cumulative = 0.0;
669
670        cumulative += dist.suspense_account_abuse;
671        if roll < cumulative {
672            return FraudType::SuspenseAccountAbuse;
673        }
674
675        cumulative += dist.fictitious_transaction;
676        if roll < cumulative {
677            return FraudType::FictitiousTransaction;
678        }
679
680        cumulative += dist.revenue_manipulation;
681        if roll < cumulative {
682            return FraudType::RevenueManipulation;
683        }
684
685        cumulative += dist.expense_capitalization;
686        if roll < cumulative {
687            return FraudType::ExpenseCapitalization;
688        }
689
690        cumulative += dist.split_transaction;
691        if roll < cumulative {
692            return FraudType::SplitTransaction;
693        }
694
695        cumulative += dist.timing_anomaly;
696        if roll < cumulative {
697            return FraudType::TimingAnomaly;
698        }
699
700        cumulative += dist.unauthorized_access;
701        if roll < cumulative {
702            return FraudType::UnauthorizedAccess;
703        }
704
705        // Default fallback
706        FraudType::DuplicatePayment
707    }
708
709    /// Map a fraud type to an amount pattern for suspicious amounts.
710    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
711        match fraud_type {
712            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
713                FraudAmountPattern::ThresholdAdjacent
714            }
715            FraudType::FictitiousTransaction
716            | FraudType::FictitiousEntry
717            | FraudType::SuspenseAccountAbuse
718            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
719            FraudType::RevenueManipulation
720            | FraudType::ExpenseCapitalization
721            | FraudType::ImproperCapitalization
722            | FraudType::ReserveManipulation
723            | FraudType::UnauthorizedAccess
724            | FraudType::PrematureRevenue
725            | FraudType::UnderstatedLiabilities
726            | FraudType::OverstatedAssets
727            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
728            FraudType::DuplicatePayment
729            | FraudType::TimingAnomaly
730            | FraudType::SelfApproval
731            | FraudType::ExceededApprovalLimit
732            | FraudType::SegregationOfDutiesViolation
733            | FraudType::UnauthorizedApproval
734            | FraudType::CollusiveApproval
735            | FraudType::FictitiousVendor
736            | FraudType::ShellCompanyPayment
737            | FraudType::Kickback
738            | FraudType::KickbackScheme
739            | FraudType::InvoiceManipulation
740            | FraudType::AssetMisappropriation
741            | FraudType::InventoryTheft
742            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
743            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
744            FraudType::ImproperRevenueRecognition
745            | FraudType::ImproperPoAllocation
746            | FraudType::VariableConsiderationManipulation
747            | FraudType::ContractModificationMisstatement => {
748                FraudAmountPattern::StatisticallyImprobable
749            }
750            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
751            FraudType::LeaseClassificationManipulation
752            | FraudType::OffBalanceSheetLease
753            | FraudType::LeaseLiabilityUnderstatement
754            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
755            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
756            FraudType::FairValueHierarchyManipulation
757            | FraudType::Level3InputManipulation
758            | FraudType::ValuationTechniqueManipulation => {
759                FraudAmountPattern::StatisticallyImprobable
760            }
761            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
762            FraudType::DelayedImpairment
763            | FraudType::ImpairmentTestAvoidance
764            | FraudType::CashFlowProjectionManipulation
765            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
766            // Sourcing/Procurement Fraud
767            FraudType::BidRigging
768            | FraudType::PhantomVendorContract
769            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
770            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
771            // HR/Payroll Fraud
772            FraudType::GhostEmployeePayroll
773            | FraudType::PayrollInflation
774            | FraudType::DuplicateExpenseReport
775            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
776            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
777            // O2C Fraud
778            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
779            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
780        }
781    }
782
783    /// Generate a deterministic UUID using the factory.
784    #[inline]
785    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
786        self.uuid_factory.next()
787    }
788
789    /// Cost center pool used for expense account enrichment.
790    const COST_CENTER_POOL: &'static [&'static str] =
791        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
792
793    /// Enrich journal entry line items with account descriptions, cost centers,
794    /// profit centers, value dates, line text, and assignment fields.
795    ///
796    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
797    /// and `::credit()` leave as `None`.
798    fn enrich_line_items(&self, entry: &mut JournalEntry) {
799        let posting_date = entry.header.posting_date;
800        let company_code = &entry.header.company_code;
801        let header_text = entry.header.header_text.clone();
802        let business_process = entry.header.business_process;
803
804        // Derive a deterministic index from the document_id for cost center selection
805        let doc_id_bytes = entry.header.document_id.as_bytes();
806        let mut cc_seed: usize = 0;
807        for &b in doc_id_bytes {
808            cc_seed = cc_seed.wrapping_add(b as usize);
809        }
810
811        for (i, line) in entry.lines.iter_mut().enumerate() {
812            // 1. account_description: look up from CoA
813            if line.account_description.is_none() {
814                line.account_description = self
815                    .coa
816                    .get_account(&line.gl_account)
817                    .map(|a| a.short_description.clone());
818            }
819
820            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
821            if line.cost_center.is_none() {
822                let first_char = line.gl_account.chars().next().unwrap_or('0');
823                if first_char == '5' || first_char == '6' {
824                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
825                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
826                }
827            }
828
829            // 3. profit_center: derive from company code + business process
830            if line.profit_center.is_none() {
831                let suffix = match business_process {
832                    Some(BusinessProcess::P2P) => "-P2P",
833                    Some(BusinessProcess::O2C) => "-O2C",
834                    Some(BusinessProcess::R2R) => "-R2R",
835                    Some(BusinessProcess::H2R) => "-H2R",
836                    _ => "",
837                };
838                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
839            }
840
841            // 4. line_text: fall back to header_text if not already set
842            if line.line_text.is_none() {
843                line.line_text = header_text.clone();
844            }
845
846            // 5. value_date: set to posting_date for AR/AP accounts
847            if line.value_date.is_none()
848                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
849            {
850                line.value_date = Some(posting_date);
851            }
852
853            // 6. assignment: set to vendor/customer reference for AP/AR lines
854            if line.assignment.is_none() {
855                if line.gl_account.starts_with("2000") {
856                    // AP line - use vendor reference from header
857                    if let Some(ref ht) = header_text {
858                        // Try to extract vendor ID from header text patterns like "... - V-001"
859                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
860                            if vendor_part.starts_with("V-")
861                                || vendor_part.starts_with("VENDOR")
862                                || vendor_part.starts_with("Vendor")
863                            {
864                                line.assignment = Some(vendor_part.to_string());
865                            }
866                        }
867                    }
868                } else if line.gl_account.starts_with("1100") {
869                    // AR line - use customer reference from header
870                    if let Some(ref ht) = header_text {
871                        if let Some(customer_part) = ht.rsplit(" - ").next() {
872                            if customer_part.starts_with("C-")
873                                || customer_part.starts_with("CUST")
874                                || customer_part.starts_with("Customer")
875                            {
876                                line.assignment = Some(customer_part.to_string());
877                            }
878                        }
879                    }
880                }
881            }
882        }
883    }
884
885    /// Generate a single journal entry.
886    pub fn generate(&mut self) -> JournalEntry {
887        debug!(
888            count = self.count,
889            companies = self.companies.len(),
890            start_date = %self.start_date,
891            end_date = %self.end_date,
892            "Generating journal entry"
893        );
894
895        // Check if we're in a batch - if so, generate a batched entry
896        if let Some(ref state) = self.batch_state {
897            if state.remaining > 0 {
898                return self.generate_batched_entry();
899            }
900        }
901
902        self.count += 1;
903
904        // Generate deterministic document ID
905        let document_id = self.generate_deterministic_uuid();
906
907        // Sample posting date
908        let mut posting_date = self
909            .temporal_sampler
910            .sample_date(self.start_date, self.end_date);
911
912        // Adjust posting date to be a business day if business day calculator is configured
913        if let Some(ref calc) = self.business_day_calculator {
914            if !calc.is_business_day(posting_date) {
915                // Move to next business day
916                posting_date = calc.next_business_day(posting_date, false);
917                // Ensure we don't exceed end_date
918                if posting_date > self.end_date {
919                    posting_date = calc.prev_business_day(self.end_date, true);
920                }
921            }
922        }
923
924        // Select company using weighted selector
925        let company_code = self.company_selector.select(&mut self.rng).to_string();
926
927        // Sample line item specification
928        let line_spec = self.line_sampler.sample();
929
930        // Determine source type using full 4-way distribution
931        let source = self.select_source();
932        let is_automated = matches!(
933            source,
934            TransactionSource::Automated | TransactionSource::Recurring
935        );
936
937        // Select business process
938        let business_process = self.select_business_process();
939
940        // Determine if this is a fraudulent transaction
941        let fraud_type = self.determine_fraud();
942        let is_fraud = fraud_type.is_some();
943
944        // Sample time based on source
945        let time = self.temporal_sampler.sample_time(!is_automated);
946        let created_at = posting_date.and_time(time).and_utc();
947
948        // Select user from pool or generate generic
949        let (created_by, user_persona) = self.select_user(is_automated);
950
951        // Create header with deterministic UUID
952        let mut header =
953            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
954        header.created_at = created_at;
955        header.source = source;
956        header.created_by = created_by;
957        header.user_persona = user_persona;
958        header.business_process = Some(business_process);
959        header.document_type = Self::document_type_for_process(business_process).to_string();
960        header.is_fraud = is_fraud;
961        header.fraud_type = fraud_type;
962
963        // --- ISA 240 audit flags ---
964        let is_manual = matches!(source, TransactionSource::Manual);
965        header.is_manual = is_manual;
966
967        // Determine source_system based on manual vs automated
968        header.source_system = if is_manual {
969            if self.rng.random::<f64>() < 0.70 {
970                "manual".to_string()
971            } else {
972                "spreadsheet".to_string()
973            }
974        } else {
975            let roll: f64 = self.rng.random();
976            if roll < 0.40 {
977                "SAP-FI".to_string()
978            } else if roll < 0.60 {
979                "SAP-MM".to_string()
980            } else if roll < 0.80 {
981                "SAP-SD".to_string()
982            } else if roll < 0.95 {
983                "interface".to_string()
984            } else {
985                "SAP-HR".to_string()
986            }
987        };
988
989        // is_post_close: entry is in the last month of the configured period
990        // and the posting date falls after the 25th (simulating close cutoff)
991        let is_post_close = posting_date.month() == self.end_date.month()
992            && posting_date.year() == self.end_date.year()
993            && posting_date.day() > 25;
994        header.is_post_close = is_post_close;
995
996        // created_date: for manual entries, same day as posting; for automated,
997        // 0-3 days before posting_date
998        let created_date = if is_manual {
999            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1000        } else {
1001            let lag_days = self.rng.random_range(0i64..=3);
1002            let created_naive_date = posting_date
1003                .checked_sub_signed(chrono::Duration::days(lag_days))
1004                .unwrap_or(posting_date);
1005            created_naive_date.and_hms_opt(
1006                self.rng.random_range(8u32..=17),
1007                self.rng.random_range(0u32..=59),
1008                self.rng.random_range(0u32..=59),
1009            )
1010        };
1011        header.created_date = created_date;
1012
1013        // Generate description context
1014        let mut context =
1015            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1016
1017        // Add vendor/customer context based on business process
1018        match business_process {
1019            BusinessProcess::P2P => {
1020                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1021                    context.vendor_name = Some(vendor.name.clone());
1022                }
1023            }
1024            BusinessProcess::O2C => {
1025                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1026                    context.customer_name = Some(customer.name.clone());
1027                }
1028            }
1029            _ => {}
1030        }
1031
1032        // Generate header text if enabled
1033        if self.template_config.descriptions.generate_header_text {
1034            header.header_text = Some(self.description_generator.generate_header_text(
1035                business_process,
1036                &context,
1037                &mut self.rng,
1038            ));
1039        }
1040
1041        // Generate reference if enabled
1042        if self.template_config.references.generate_references {
1043            header.reference = Some(
1044                self.reference_generator
1045                    .generate_for_process_year(business_process, posting_date.year()),
1046            );
1047        }
1048
1049        // Derive typed source document from reference prefix
1050        header.source_document = header
1051            .reference
1052            .as_deref()
1053            .and_then(DocumentRef::parse)
1054            .or_else(|| {
1055                if header.source == TransactionSource::Manual {
1056                    Some(DocumentRef::Manual)
1057                } else {
1058                    None
1059                }
1060            });
1061
1062        // Generate line items
1063        let mut entry = JournalEntry::new(header);
1064
1065        // Generate amount - use fraud pattern if this is a fraudulent transaction
1066        let base_amount = if let Some(ft) = fraud_type {
1067            let pattern = self.fraud_type_to_amount_pattern(ft);
1068            self.amount_sampler.sample_fraud(pattern)
1069        } else {
1070            self.amount_sampler.sample()
1071        };
1072
1073        // Apply temporal drift if configured
1074        let drift_adjusted_amount = {
1075            let drift = self.get_drift_adjustments(posting_date);
1076            if drift.amount_mean_multiplier != 1.0 {
1077                // Apply drift multiplier (includes seasonal factor if enabled)
1078                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1079                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1080                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1081            } else {
1082                base_amount
1083            }
1084        };
1085
1086        // Apply human variation to amounts for non-automated transactions
1087        let total_amount = if is_automated {
1088            drift_adjusted_amount // Automated systems use exact amounts
1089        } else {
1090            self.apply_human_variation(drift_adjusted_amount)
1091        };
1092
1093        // Generate debit lines
1094        let debit_amounts = self
1095            .amount_sampler
1096            .sample_summing_to(line_spec.debit_count, total_amount);
1097        for (i, amount) in debit_amounts.into_iter().enumerate() {
1098            let account_number = self.select_debit_account().account_number.clone();
1099            let mut line = JournalEntryLine::debit(
1100                entry.header.document_id,
1101                (i + 1) as u32,
1102                account_number.clone(),
1103                amount,
1104            );
1105
1106            // Generate line text if enabled
1107            if self.template_config.descriptions.generate_line_text {
1108                line.line_text = Some(self.description_generator.generate_line_text(
1109                    &account_number,
1110                    &context,
1111                    &mut self.rng,
1112                ));
1113            }
1114
1115            entry.add_line(line);
1116        }
1117
1118        // Generate credit lines - use the SAME amounts to ensure balance
1119        let credit_amounts = self
1120            .amount_sampler
1121            .sample_summing_to(line_spec.credit_count, total_amount);
1122        for (i, amount) in credit_amounts.into_iter().enumerate() {
1123            let account_number = self.select_credit_account().account_number.clone();
1124            let mut line = JournalEntryLine::credit(
1125                entry.header.document_id,
1126                (line_spec.debit_count + i + 1) as u32,
1127                account_number.clone(),
1128                amount,
1129            );
1130
1131            // Generate line text if enabled
1132            if self.template_config.descriptions.generate_line_text {
1133                line.line_text = Some(self.description_generator.generate_line_text(
1134                    &account_number,
1135                    &context,
1136                    &mut self.rng,
1137                ));
1138            }
1139
1140            entry.add_line(line);
1141        }
1142
1143        // Enrich line items with account descriptions, cost centers, etc.
1144        self.enrich_line_items(&mut entry);
1145
1146        // Apply persona-based errors if enabled and it's a human user
1147        if self.persona_errors_enabled && !is_automated {
1148            self.maybe_inject_persona_error(&mut entry);
1149        }
1150
1151        // Apply approval workflow if enabled and amount exceeds threshold
1152        if self.approval_enabled {
1153            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1154        }
1155
1156        // Populate approved_by / approval_date from the approval workflow
1157        self.populate_approval_fields(&mut entry, posting_date);
1158
1159        // Maybe start a batch of similar entries for realism
1160        self.maybe_start_batch(&entry);
1161
1162        entry
1163    }
1164
1165    /// Enable or disable persona-based error injection.
1166    ///
1167    /// When enabled, entries created by human personas have a chance
1168    /// to contain realistic human errors based on their experience level.
1169    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1170        self.persona_errors_enabled = enabled;
1171        self
1172    }
1173
1174    /// Set fraud configuration for fraud injection.
1175    ///
1176    /// When fraud is enabled in the config, transactions have a chance
1177    /// to be marked as fraudulent based on the configured fraud rate.
1178    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1179        self.fraud_config = config;
1180        self
1181    }
1182
1183    /// Check if persona errors are enabled.
1184    pub fn persona_errors_enabled(&self) -> bool {
1185        self.persona_errors_enabled
1186    }
1187
1188    /// Enable or disable batch processing behavior.
1189    ///
1190    /// When enabled (default), the generator will occasionally produce batches
1191    /// of similar entries, simulating how humans batch similar work together.
1192    pub fn with_batching(mut self, enabled: bool) -> Self {
1193        if !enabled {
1194            self.batch_state = None;
1195        }
1196        self
1197    }
1198
1199    /// Check if batch processing is enabled.
1200    pub fn batching_enabled(&self) -> bool {
1201        // Batching is implicitly enabled when not explicitly disabled
1202        true
1203    }
1204
1205    /// Maybe start a batch based on the current entry.
1206    ///
1207    /// Humans often batch similar work: processing invoices from one vendor,
1208    /// entering expense reports for a trip, reconciling similar items.
1209    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1210        // Only start batch for non-automated, non-fraud entries
1211        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1212            return;
1213        }
1214
1215        // 15% chance to start a batch (most work is not batched)
1216        if self.rng.random::<f64>() > 0.15 {
1217            return;
1218        }
1219
1220        // Extract key attributes for batching
1221        let base_account = entry
1222            .lines
1223            .first()
1224            .map(|l| l.gl_account.clone())
1225            .unwrap_or_default();
1226
1227        let base_amount = entry.total_debit();
1228
1229        self.batch_state = Some(BatchState {
1230            base_account_number: base_account,
1231            base_amount,
1232            base_business_process: entry.header.business_process,
1233            base_posting_date: entry.header.posting_date,
1234            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1235        });
1236    }
1237
1238    /// Generate an entry that's part of the current batch.
1239    ///
1240    /// Batched entries have:
1241    /// - Same or very similar business process
1242    /// - Same posting date (batched work done together)
1243    /// - Similar amounts (within ±15%)
1244    /// - Same debit account (processing similar items)
1245    fn generate_batched_entry(&mut self) -> JournalEntry {
1246        use rust_decimal::Decimal;
1247
1248        // Decrement batch counter
1249        if let Some(ref mut state) = self.batch_state {
1250            state.remaining = state.remaining.saturating_sub(1);
1251        }
1252
1253        let Some(batch) = self.batch_state.clone() else {
1254            // This is a programming error - batch_state should be set before calling this method.
1255            // Clear state and fall back to generating a standard entry instead of panicking.
1256            tracing::warn!(
1257                "generate_batched_entry called without batch_state; generating standard entry"
1258            );
1259            self.batch_state = None;
1260            return self.generate();
1261        };
1262
1263        // Use the batch's posting date (work done on same day)
1264        let posting_date = batch.base_posting_date;
1265
1266        self.count += 1;
1267        let document_id = self.generate_deterministic_uuid();
1268
1269        // Select same company (batched work is usually same company)
1270        let company_code = self.company_selector.select(&mut self.rng).to_string();
1271
1272        // Use simplified line spec for batched entries (usually 2-line)
1273        let _line_spec = LineItemSpec {
1274            total_count: 2,
1275            debit_count: 1,
1276            credit_count: 1,
1277            split_type: DebitCreditSplit::Equal,
1278        };
1279
1280        // Batched entries are always manual
1281        let source = TransactionSource::Manual;
1282
1283        // Use the batch's business process
1284        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1285
1286        // Sample time
1287        let time = self.temporal_sampler.sample_time(true);
1288        let created_at = posting_date.and_time(time).and_utc();
1289
1290        // Same user for batched work
1291        let (created_by, user_persona) = self.select_user(false);
1292
1293        // Create header
1294        let mut header =
1295            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1296        header.created_at = created_at;
1297        header.source = source;
1298        header.created_by = created_by;
1299        header.user_persona = user_persona;
1300        header.business_process = Some(business_process);
1301        header.document_type = Self::document_type_for_process(business_process).to_string();
1302
1303        // Batched manual entries have Manual source document
1304        header.source_document = Some(DocumentRef::Manual);
1305
1306        // ISA 240 audit flags for batched entries (always manual)
1307        header.is_manual = true;
1308        header.source_system = if self.rng.random::<f64>() < 0.70 {
1309            "manual".to_string()
1310        } else {
1311            "spreadsheet".to_string()
1312        };
1313        header.is_post_close = posting_date.month() == self.end_date.month()
1314            && posting_date.year() == self.end_date.year()
1315            && posting_date.day() > 25;
1316        header.created_date =
1317            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1318
1319        // Generate similar amount (within ±15% of base)
1320        let variation = self.rng.random_range(-0.15..0.15);
1321        let varied_amount =
1322            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1323        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1324
1325        // Create the entry
1326        let mut entry = JournalEntry::new(header);
1327
1328        // Use same debit account as batch base
1329        let debit_line = JournalEntryLine::debit(
1330            entry.header.document_id,
1331            1,
1332            batch.base_account_number.clone(),
1333            total_amount,
1334        );
1335        entry.add_line(debit_line);
1336
1337        // Select a credit account
1338        let credit_account = self.select_credit_account().account_number.clone();
1339        let credit_line =
1340            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1341        entry.add_line(credit_line);
1342
1343        // Enrich line items with account descriptions, cost centers, etc.
1344        self.enrich_line_items(&mut entry);
1345
1346        // Apply persona-based errors if enabled
1347        if self.persona_errors_enabled {
1348            self.maybe_inject_persona_error(&mut entry);
1349        }
1350
1351        // Apply approval workflow if enabled
1352        if self.approval_enabled {
1353            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1354        }
1355
1356        // Populate approved_by / approval_date from the approval workflow
1357        self.populate_approval_fields(&mut entry, posting_date);
1358
1359        // Clear batch state if no more entries remaining
1360        if batch.remaining <= 1 {
1361            self.batch_state = None;
1362        }
1363
1364        entry
1365    }
1366
1367    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1368    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1369        // Parse persona from the entry header
1370        let persona_str = &entry.header.user_persona;
1371        let persona = match persona_str.to_lowercase().as_str() {
1372            s if s.contains("junior") => UserPersona::JuniorAccountant,
1373            s if s.contains("senior") => UserPersona::SeniorAccountant,
1374            s if s.contains("controller") => UserPersona::Controller,
1375            s if s.contains("manager") => UserPersona::Manager,
1376            s if s.contains("executive") => UserPersona::Executive,
1377            _ => return, // Don't inject errors for unknown personas
1378        };
1379
1380        // Get base error rate from persona
1381        let base_error_rate = persona.error_rate();
1382
1383        // Apply stress factors based on posting date
1384        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1385
1386        // Check if error should occur based on adjusted rate
1387        if self.rng.random::<f64>() >= adjusted_rate {
1388            return; // No error this time
1389        }
1390
1391        // Select and inject persona-appropriate error
1392        self.inject_human_error(entry, persona);
1393    }
1394
1395    /// Apply contextual stress factors to the base error rate.
1396    ///
1397    /// Stress factors increase error likelihood during:
1398    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1399    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1400    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1401    /// - Monday morning (catch-up work): 20% more errors
1402    /// - Friday afternoon (rushing to leave): 30% more errors
1403    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1404        use chrono::Datelike;
1405
1406        let mut rate = base_rate;
1407        let day = posting_date.day();
1408        let month = posting_date.month();
1409
1410        // Year-end stress (December 28-31): double the error rate
1411        if month == 12 && day >= 28 {
1412            rate *= 2.0;
1413            return rate.min(0.5); // Cap at 50% to keep it realistic
1414        }
1415
1416        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1417        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1418            rate *= 1.75; // 75% more errors at quarter end
1419            return rate.min(0.4);
1420        }
1421
1422        // Month-end stress (last 3 days of month)
1423        if day >= 28 {
1424            rate *= 1.5; // 50% more errors at month end
1425        }
1426
1427        // Day-of-week stress effects
1428        let weekday = posting_date.weekday();
1429        match weekday {
1430            chrono::Weekday::Mon => {
1431                // Monday: catching up, often rushed
1432                rate *= 1.2;
1433            }
1434            chrono::Weekday::Fri => {
1435                // Friday: rushing to finish before weekend
1436                rate *= 1.3;
1437            }
1438            _ => {}
1439        }
1440
1441        // Cap at 40% to keep it realistic
1442        rate.min(0.4)
1443    }
1444
1445    /// Apply human-like variation to an amount.
1446    ///
1447    /// Humans don't enter perfectly calculated amounts - they:
1448    /// - Round amounts differently
1449    /// - Estimate instead of calculating exactly
1450    /// - Make small input variations
1451    ///
1452    /// This applies small variations (typically ±2%) to make amounts more realistic.
1453    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1454        use rust_decimal::Decimal;
1455
1456        // Automated transactions or very small amounts don't get variation
1457        if amount < Decimal::from(10) {
1458            return amount;
1459        }
1460
1461        // 70% chance of human variation being applied
1462        if self.rng.random::<f64>() > 0.70 {
1463            return amount;
1464        }
1465
1466        // Decide which type of human variation to apply
1467        let variation_type: u8 = self.rng.random_range(0..4);
1468
1469        match variation_type {
1470            0 => {
1471                // ±2% variation (common for estimated amounts)
1472                let variation_pct = self.rng.random_range(-0.02..0.02);
1473                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1474                (amount + variation).round_dp(2)
1475            }
1476            1 => {
1477                // Round to nearest $10
1478                let ten = Decimal::from(10);
1479                (amount / ten).round() * ten
1480            }
1481            2 => {
1482                // Round to nearest $100 (for larger amounts)
1483                if amount >= Decimal::from(500) {
1484                    let hundred = Decimal::from(100);
1485                    (amount / hundred).round() * hundred
1486                } else {
1487                    amount
1488                }
1489            }
1490            3 => {
1491                // Slight under/over payment (±$0.01 to ±$1.00)
1492                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1493                (amount + cents).max(Decimal::ZERO).round_dp(2)
1494            }
1495            _ => amount,
1496        }
1497    }
1498
1499    /// Rebalance an entry after a one-sided amount modification.
1500    ///
1501    /// When an error modifies one line's amount, this finds a line on the opposite
1502    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1503    /// same impact to maintain balance.
1504    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1505        // Find a line on the opposite side to adjust
1506        let balancing_idx = entry.lines.iter().position(|l| {
1507            if modified_was_debit {
1508                l.credit_amount > Decimal::ZERO
1509            } else {
1510                l.debit_amount > Decimal::ZERO
1511            }
1512        });
1513
1514        if let Some(idx) = balancing_idx {
1515            if modified_was_debit {
1516                entry.lines[idx].credit_amount += impact;
1517            } else {
1518                entry.lines[idx].debit_amount += impact;
1519            }
1520        }
1521    }
1522
1523    /// Inject a human-like error based on the persona.
1524    ///
1525    /// All error types maintain balance - amount modifications are applied to both sides.
1526    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1527    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1528        use rust_decimal::Decimal;
1529
1530        // Different personas make different types of errors
1531        let error_type: u8 = match persona {
1532            UserPersona::JuniorAccountant => {
1533                // Junior accountants make more varied errors
1534                self.rng.random_range(0..5)
1535            }
1536            UserPersona::SeniorAccountant => {
1537                // Senior accountants mainly make transposition errors
1538                self.rng.random_range(0..3)
1539            }
1540            UserPersona::Controller | UserPersona::Manager => {
1541                // Controllers/managers mainly make rounding or cutoff errors
1542                self.rng.random_range(3..5)
1543            }
1544            _ => return,
1545        };
1546
1547        match error_type {
1548            0 => {
1549                // Transposed digits in an amount
1550                if let Some(line) = entry.lines.get_mut(0) {
1551                    let is_debit = line.debit_amount > Decimal::ZERO;
1552                    let original_amount = if is_debit {
1553                        line.debit_amount
1554                    } else {
1555                        line.credit_amount
1556                    };
1557
1558                    // Simple digit swap in the string representation
1559                    let s = original_amount.to_string();
1560                    if s.len() >= 2 {
1561                        let chars: Vec<char> = s.chars().collect();
1562                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1563                        if chars[pos].is_ascii_digit()
1564                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1565                        {
1566                            let mut new_chars = chars;
1567                            new_chars.swap(pos, pos + 1);
1568                            if let Ok(new_amount) =
1569                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1570                            {
1571                                let impact = new_amount - original_amount;
1572
1573                                // Apply to the modified line
1574                                if is_debit {
1575                                    entry.lines[0].debit_amount = new_amount;
1576                                } else {
1577                                    entry.lines[0].credit_amount = new_amount;
1578                                }
1579
1580                                // Rebalance the entry
1581                                Self::rebalance_entry(entry, is_debit, impact);
1582
1583                                entry.header.header_text = Some(
1584                                    entry.header.header_text.clone().unwrap_or_default()
1585                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1586                                );
1587                            }
1588                        }
1589                    }
1590                }
1591            }
1592            1 => {
1593                // Wrong decimal place (off by factor of 10)
1594                if let Some(line) = entry.lines.get_mut(0) {
1595                    let is_debit = line.debit_amount > Decimal::ZERO;
1596                    let original_amount = if is_debit {
1597                        line.debit_amount
1598                    } else {
1599                        line.credit_amount
1600                    };
1601
1602                    let new_amount = original_amount * Decimal::new(10, 0);
1603                    let impact = new_amount - original_amount;
1604
1605                    // Apply to the modified line
1606                    if is_debit {
1607                        entry.lines[0].debit_amount = new_amount;
1608                    } else {
1609                        entry.lines[0].credit_amount = new_amount;
1610                    }
1611
1612                    // Rebalance the entry
1613                    Self::rebalance_entry(entry, is_debit, impact);
1614
1615                    entry.header.header_text = Some(
1616                        entry.header.header_text.clone().unwrap_or_default()
1617                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1618                    );
1619                }
1620            }
1621            2 => {
1622                // Typo in description (doesn't affect balance)
1623                if let Some(ref mut text) = entry.header.header_text {
1624                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1625                    let correct = ["the", "and", "with", "that", "receive"];
1626                    let idx = self.rng.random_range(0..typos.len());
1627                    if text.to_lowercase().contains(correct[idx]) {
1628                        *text = text.replace(correct[idx], typos[idx]);
1629                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1630                    }
1631                }
1632            }
1633            3 => {
1634                // Rounding to round number
1635                if let Some(line) = entry.lines.get_mut(0) {
1636                    let is_debit = line.debit_amount > Decimal::ZERO;
1637                    let original_amount = if is_debit {
1638                        line.debit_amount
1639                    } else {
1640                        line.credit_amount
1641                    };
1642
1643                    let new_amount =
1644                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1645                    let impact = new_amount - original_amount;
1646
1647                    // Apply to the modified line
1648                    if is_debit {
1649                        entry.lines[0].debit_amount = new_amount;
1650                    } else {
1651                        entry.lines[0].credit_amount = new_amount;
1652                    }
1653
1654                    // Rebalance the entry
1655                    Self::rebalance_entry(entry, is_debit, impact);
1656
1657                    entry.header.header_text = Some(
1658                        entry.header.header_text.clone().unwrap_or_default()
1659                            + " [HUMAN_ERROR:ROUNDED]",
1660                    );
1661                }
1662            }
1663            // Late posting marker (document date much earlier than posting
1664            // date). Doesn't create an imbalance.
1665            4 if entry.header.document_date == entry.header.posting_date => {
1666                let days_late = self.rng.random_range(5..15);
1667                entry.header.document_date =
1668                    entry.header.posting_date - chrono::Duration::days(days_late);
1669                entry.header.header_text = Some(
1670                    entry.header.header_text.clone().unwrap_or_default()
1671                        + " [HUMAN_ERROR:LATE_POSTING]",
1672                );
1673            }
1674            _ => {}
1675        }
1676    }
1677
1678    /// Apply approval workflow for high-value transactions.
1679    ///
1680    /// If the entry amount exceeds the approval threshold, simulate an
1681    /// approval workflow with appropriate approvers based on amount.
1682    fn maybe_apply_approval_workflow(
1683        &mut self,
1684        entry: &mut JournalEntry,
1685        _posting_date: NaiveDate,
1686    ) {
1687        use rust_decimal::Decimal;
1688
1689        let amount = entry.total_debit();
1690
1691        // Skip if amount is below threshold
1692        if amount <= self.approval_threshold {
1693            // Auto-approved below threshold
1694            let workflow = ApprovalWorkflow::auto_approved(
1695                entry.header.created_by.clone(),
1696                entry.header.user_persona.clone(),
1697                amount,
1698                entry.header.created_at,
1699            );
1700            entry.header.approval_workflow = Some(workflow);
1701            return;
1702        }
1703
1704        // Mark as SOX relevant for high-value transactions
1705        entry.header.sox_relevant = true;
1706
1707        // Determine required approval levels based on amount
1708        let required_levels = if amount > Decimal::new(100000, 0) {
1709            3 // Executive approval required
1710        } else if amount > Decimal::new(50000, 0) {
1711            2 // Senior management approval
1712        } else {
1713            1 // Manager approval
1714        };
1715
1716        // Create the approval workflow
1717        let mut workflow = ApprovalWorkflow::new(
1718            entry.header.created_by.clone(),
1719            entry.header.user_persona.clone(),
1720            amount,
1721        );
1722        workflow.required_levels = required_levels;
1723
1724        // Simulate submission
1725        let submit_time = entry.header.created_at;
1726        let submit_action = ApprovalAction::new(
1727            entry.header.created_by.clone(),
1728            entry.header.user_persona.clone(),
1729            self.parse_persona(&entry.header.user_persona),
1730            ApprovalActionType::Submit,
1731            0,
1732        )
1733        .with_timestamp(submit_time);
1734
1735        workflow.actions.push(submit_action);
1736        workflow.status = ApprovalStatus::Pending;
1737        workflow.submitted_at = Some(submit_time);
1738
1739        // Simulate approvals with realistic delays
1740        let mut current_time = submit_time;
1741        for level in 1..=required_levels {
1742            // Add delay for approval (1-3 business hours per level)
1743            let delay_hours = self.rng.random_range(1..4);
1744            current_time += chrono::Duration::hours(delay_hours);
1745
1746            // Skip weekends
1747            while current_time.weekday() == chrono::Weekday::Sat
1748                || current_time.weekday() == chrono::Weekday::Sun
1749            {
1750                current_time += chrono::Duration::days(1);
1751            }
1752
1753            // Generate approver based on level
1754            let (approver_id, approver_role) = self.select_approver(level);
1755
1756            let approve_action = ApprovalAction::new(
1757                approver_id.clone(),
1758                approver_role.to_string(),
1759                approver_role,
1760                ApprovalActionType::Approve,
1761                level,
1762            )
1763            .with_timestamp(current_time);
1764
1765            workflow.actions.push(approve_action);
1766            workflow.current_level = level;
1767        }
1768
1769        // Mark as approved
1770        workflow.status = ApprovalStatus::Approved;
1771        workflow.approved_at = Some(current_time);
1772
1773        entry.header.approval_workflow = Some(workflow);
1774    }
1775
1776    /// Select an approver based on the required level.
1777    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1778        let persona = match level {
1779            1 => UserPersona::Manager,
1780            2 => UserPersona::Controller,
1781            _ => UserPersona::Executive,
1782        };
1783
1784        // Try to get from user pool first
1785        if let Some(ref pool) = self.user_pool {
1786            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1787                return (user.user_id.clone(), persona);
1788            }
1789        }
1790
1791        // Fallback to generated approver
1792        let approver_id = match persona {
1793            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1794            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1795            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1796            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1797        };
1798
1799        (approver_id, persona)
1800    }
1801
1802    /// Parse user persona from string.
1803    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1804        match persona_str.to_lowercase().as_str() {
1805            s if s.contains("junior") => UserPersona::JuniorAccountant,
1806            s if s.contains("senior") => UserPersona::SeniorAccountant,
1807            s if s.contains("controller") => UserPersona::Controller,
1808            s if s.contains("manager") => UserPersona::Manager,
1809            s if s.contains("executive") => UserPersona::Executive,
1810            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1811            _ => UserPersona::JuniorAccountant, // Default
1812        }
1813    }
1814
1815    /// Enable or disable approval workflow.
1816    pub fn with_approval(mut self, enabled: bool) -> Self {
1817        self.approval_enabled = enabled;
1818        self
1819    }
1820
1821    /// Set the approval threshold amount.
1822    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1823        self.approval_threshold = threshold;
1824        self
1825    }
1826
1827    /// Set the SOD violation rate for approval tracking.
1828    ///
1829    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
1830    /// that the approver is the same as the creator, which constitutes a SOD violation.
1831    /// Default is 0.10 (10%).
1832    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1833        self.sod_violation_rate = rate;
1834        self
1835    }
1836
1837    /// Populate `approved_by` and `approval_date` from the approval workflow,
1838    /// and flag SOD violations when the approver matches the creator.
1839    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1840        if let Some(ref workflow) = entry.header.approval_workflow {
1841            // Extract the last approver from the workflow actions
1842            let last_approver = workflow
1843                .actions
1844                .iter()
1845                .rev()
1846                .find(|a| matches!(a.action, ApprovalActionType::Approve));
1847
1848            if let Some(approver_action) = last_approver {
1849                entry.header.approved_by = Some(approver_action.actor_id.clone());
1850                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1851            } else {
1852                // No explicit approver (auto-approved); use the preparer
1853                entry.header.approved_by = Some(workflow.preparer_id.clone());
1854                entry.header.approval_date = Some(posting_date);
1855            }
1856
1857            // Inject SOD violation: with configured probability, set approver = creator
1858            if self.rng.random::<f64>() < self.sod_violation_rate {
1859                let creator = entry.header.created_by.clone();
1860                entry.header.approved_by = Some(creator);
1861                entry.header.sod_violation = true;
1862                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1863            }
1864        }
1865    }
1866
1867    /// Set the temporal drift controller for simulating distribution changes over time.
1868    ///
1869    /// When drift is enabled, amounts and other distributions will shift based on
1870    /// the period (month) to simulate realistic temporal evolution like inflation
1871    /// or increasing fraud rates.
1872    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1873        self.drift_controller = Some(controller);
1874        self
1875    }
1876
1877    /// Set drift configuration directly.
1878    ///
1879    /// Creates a drift controller from the config. Total periods is calculated
1880    /// from the date range.
1881    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1882        if config.enabled {
1883            let total_periods = self.calculate_total_periods();
1884            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1885        }
1886        self
1887    }
1888
1889    /// Calculate total periods (months) in the date range.
1890    fn calculate_total_periods(&self) -> u32 {
1891        let start_year = self.start_date.year();
1892        let start_month = self.start_date.month();
1893        let end_year = self.end_date.year();
1894        let end_month = self.end_date.month();
1895
1896        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1897    }
1898
1899    /// Calculate the period number (0-indexed) for a given date.
1900    fn date_to_period(&self, date: NaiveDate) -> u32 {
1901        let start_year = self.start_date.year();
1902        let start_month = self.start_date.month() as i32;
1903        let date_year = date.year();
1904        let date_month = date.month() as i32;
1905
1906        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1907    }
1908
1909    /// Get drift adjustments for a given date.
1910    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1911        if let Some(ref controller) = self.drift_controller {
1912            let period = self.date_to_period(date);
1913            controller.compute_adjustments(period)
1914        } else {
1915            DriftAdjustments::none()
1916        }
1917    }
1918
1919    /// Select a user from the pool or generate a generic user ID.
1920    #[inline]
1921    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1922        if let Some(ref pool) = self.user_pool {
1923            let persona = if is_automated {
1924                UserPersona::AutomatedSystem
1925            } else {
1926                // Random distribution among human personas
1927                let roll: f64 = self.rng.random();
1928                if roll < 0.4 {
1929                    UserPersona::JuniorAccountant
1930                } else if roll < 0.7 {
1931                    UserPersona::SeniorAccountant
1932                } else if roll < 0.85 {
1933                    UserPersona::Controller
1934                } else {
1935                    UserPersona::Manager
1936                }
1937            };
1938
1939            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1940                return (user.user_id.clone(), user.persona.to_string());
1941            }
1942        }
1943
1944        // Fallback to generic format
1945        if is_automated {
1946            (
1947                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1948                "automated_system".to_string(),
1949            )
1950        } else {
1951            (
1952                format!("USER{:04}", self.rng.random_range(1..=40)),
1953                "senior_accountant".to_string(),
1954            )
1955        }
1956    }
1957
1958    /// Select transaction source based on configuration weights.
1959    #[inline]
1960    fn select_source(&mut self) -> TransactionSource {
1961        let roll: f64 = self.rng.random();
1962        let dist = &self.config.source_distribution;
1963
1964        if roll < dist.manual {
1965            TransactionSource::Manual
1966        } else if roll < dist.manual + dist.automated {
1967            TransactionSource::Automated
1968        } else if roll < dist.manual + dist.automated + dist.recurring {
1969            TransactionSource::Recurring
1970        } else {
1971            TransactionSource::Adjustment
1972        }
1973    }
1974
1975    /// Select a business process based on configuration weights.
1976    #[inline]
1977    /// Map a business process to a SAP-style document type code.
1978    ///
1979    /// - P2P → "KR" (vendor invoice)
1980    /// - O2C → "DR" (customer invoice)
1981    /// - R2R → "SA" (general journal)
1982    /// - H2R → "HR" (HR posting)
1983    /// - A2R → "AA" (asset posting)
1984    /// - others → "SA"
1985    fn document_type_for_process(process: BusinessProcess) -> &'static str {
1986        match process {
1987            BusinessProcess::P2P => "KR",
1988            BusinessProcess::O2C => "DR",
1989            BusinessProcess::R2R => "SA",
1990            BusinessProcess::H2R => "HR",
1991            BusinessProcess::A2R => "AA",
1992            _ => "SA",
1993        }
1994    }
1995
1996    fn select_business_process(&mut self) -> BusinessProcess {
1997        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
1998    }
1999
2000    #[inline]
2001    fn select_debit_account(&mut self) -> &GLAccount {
2002        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2003        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2004
2005        // 60% asset, 40% expense for debits
2006        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2007            accounts
2008        } else {
2009            expense_accounts
2010        };
2011
2012        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2013            tracing::warn!(
2014                "Account selection returned empty list, falling back to first COA account"
2015            );
2016            &self.coa.accounts[0]
2017        })
2018    }
2019
2020    #[inline]
2021    fn select_credit_account(&mut self) -> &GLAccount {
2022        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2023        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2024
2025        // 60% liability, 40% revenue for credits
2026        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2027            liability_accounts
2028        } else {
2029            revenue_accounts
2030        };
2031
2032        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2033            tracing::warn!(
2034                "Account selection returned empty list, falling back to first COA account"
2035            );
2036            &self.coa.accounts[0]
2037        })
2038    }
2039}
2040
2041impl Generator for JournalEntryGenerator {
2042    type Item = JournalEntry;
2043    type Config = (
2044        TransactionConfig,
2045        Arc<ChartOfAccounts>,
2046        Vec<String>,
2047        NaiveDate,
2048        NaiveDate,
2049    );
2050
2051    fn new(config: Self::Config, seed: u64) -> Self {
2052        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2053    }
2054
2055    fn generate_one(&mut self) -> Self::Item {
2056        self.generate()
2057    }
2058
2059    fn reset(&mut self) {
2060        self.rng = seeded_rng(self.seed, 0);
2061        self.line_sampler.reset(self.seed + 1);
2062        self.amount_sampler.reset(self.seed + 2);
2063        self.temporal_sampler.reset(self.seed + 3);
2064        self.count = 0;
2065        self.uuid_factory.reset();
2066
2067        // Reset reference generator by recreating it
2068        let mut ref_gen = ReferenceGenerator::new(
2069            self.start_date.year(),
2070            self.companies
2071                .first()
2072                .map(std::string::String::as_str)
2073                .unwrap_or("1000"),
2074        );
2075        ref_gen.set_prefix(
2076            ReferenceType::Invoice,
2077            &self.template_config.references.invoice_prefix,
2078        );
2079        ref_gen.set_prefix(
2080            ReferenceType::PurchaseOrder,
2081            &self.template_config.references.po_prefix,
2082        );
2083        ref_gen.set_prefix(
2084            ReferenceType::SalesOrder,
2085            &self.template_config.references.so_prefix,
2086        );
2087        self.reference_generator = ref_gen;
2088    }
2089
2090    fn count(&self) -> u64 {
2091        self.count
2092    }
2093
2094    fn seed(&self) -> u64 {
2095        self.seed
2096    }
2097}
2098
2099use datasynth_core::traits::ParallelGenerator;
2100
2101impl ParallelGenerator for JournalEntryGenerator {
2102    /// Split this generator into `parts` independent sub-generators.
2103    ///
2104    /// Each sub-generator gets a deterministic seed derived from the parent seed
2105    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2106    /// The results are deterministic for a given partition count.
2107    fn split(self, parts: usize) -> Vec<Self> {
2108        let parts = parts.max(1);
2109        (0..parts)
2110            .map(|i| {
2111                // Derive a unique seed per partition using a golden-ratio constant
2112                let sub_seed = self
2113                    .seed
2114                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2115
2116                let mut gen = JournalEntryGenerator::new_with_full_config(
2117                    self.config.clone(),
2118                    Arc::clone(&self.coa),
2119                    self.companies.clone(),
2120                    self.start_date,
2121                    self.end_date,
2122                    sub_seed,
2123                    self.template_config.clone(),
2124                    self.user_pool.clone(),
2125                );
2126
2127                // Copy over configuration state
2128                gen.company_selector = self.company_selector.clone();
2129                gen.vendor_pool = self.vendor_pool.clone();
2130                gen.customer_pool = self.customer_pool.clone();
2131                gen.material_pool = self.material_pool.clone();
2132                gen.using_real_master_data = self.using_real_master_data;
2133                gen.fraud_config = self.fraud_config.clone();
2134                gen.persona_errors_enabled = self.persona_errors_enabled;
2135                gen.approval_enabled = self.approval_enabled;
2136                gen.approval_threshold = self.approval_threshold;
2137                gen.sod_violation_rate = self.sod_violation_rate;
2138
2139                // Use partitioned UUID factory to eliminate atomic contention
2140                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2141                    sub_seed,
2142                    GeneratorType::JournalEntry,
2143                    i as u8,
2144                );
2145
2146                // Copy temporal patterns if configured
2147                if let Some(ref config) = self.temporal_patterns_config {
2148                    gen.temporal_patterns_config = Some(config.clone());
2149                    // Rebuild business day calculator from the stored config
2150                    if config.business_days.enabled {
2151                        if let Some(ref bdc) = self.business_day_calculator {
2152                            gen.business_day_calculator = Some(bdc.clone());
2153                        }
2154                    }
2155                    // Rebuild processing lag calculator with partition seed
2156                    if config.processing_lags.enabled {
2157                        let lag_config =
2158                            Self::convert_processing_lag_config(&config.processing_lags);
2159                        gen.processing_lag_calculator =
2160                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2161                    }
2162                }
2163
2164                // Copy drift controller if present
2165                if let Some(ref dc) = self.drift_controller {
2166                    gen.drift_controller = Some(dc.clone());
2167                }
2168
2169                gen
2170            })
2171            .collect()
2172    }
2173}
2174
2175#[cfg(test)]
2176#[allow(clippy::unwrap_used)]
2177mod tests {
2178    use super::*;
2179    use crate::ChartOfAccountsGenerator;
2180
2181    #[test]
2182    fn test_generate_balanced_entries() {
2183        let mut coa_gen =
2184            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2185        let coa = Arc::new(coa_gen.generate());
2186
2187        let mut je_gen = JournalEntryGenerator::new_with_params(
2188            TransactionConfig::default(),
2189            coa,
2190            vec!["1000".to_string()],
2191            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2192            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2193            42,
2194        );
2195
2196        let mut balanced_count = 0;
2197        for _ in 0..100 {
2198            let entry = je_gen.generate();
2199
2200            // Skip entries with human errors as they may be intentionally unbalanced
2201            let has_human_error = entry
2202                .header
2203                .header_text
2204                .as_ref()
2205                .map(|t| t.contains("[HUMAN_ERROR:"))
2206                .unwrap_or(false);
2207
2208            if !has_human_error {
2209                assert!(
2210                    entry.is_balanced(),
2211                    "Entry {:?} is not balanced",
2212                    entry.header.document_id
2213                );
2214                balanced_count += 1;
2215            }
2216            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2217        }
2218
2219        // Ensure most entries are balanced (human errors are rare)
2220        assert!(
2221            balanced_count >= 80,
2222            "Expected at least 80 balanced entries, got {}",
2223            balanced_count
2224        );
2225    }
2226
2227    #[test]
2228    fn test_deterministic_generation() {
2229        let mut coa_gen =
2230            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2231        let coa = Arc::new(coa_gen.generate());
2232
2233        let mut gen1 = JournalEntryGenerator::new_with_params(
2234            TransactionConfig::default(),
2235            Arc::clone(&coa),
2236            vec!["1000".to_string()],
2237            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2238            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2239            42,
2240        );
2241
2242        let mut gen2 = JournalEntryGenerator::new_with_params(
2243            TransactionConfig::default(),
2244            coa,
2245            vec!["1000".to_string()],
2246            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2247            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2248            42,
2249        );
2250
2251        for _ in 0..50 {
2252            let e1 = gen1.generate();
2253            let e2 = gen2.generate();
2254            assert_eq!(e1.header.document_id, e2.header.document_id);
2255            assert_eq!(e1.total_debit(), e2.total_debit());
2256        }
2257    }
2258
2259    #[test]
2260    fn test_templates_generate_descriptions() {
2261        let mut coa_gen =
2262            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2263        let coa = Arc::new(coa_gen.generate());
2264
2265        // Enable all template features
2266        let template_config = TemplateConfig {
2267            names: datasynth_config::schema::NameTemplateConfig {
2268                generate_realistic_names: true,
2269                email_domain: "test.com".to_string(),
2270                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2271            },
2272            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2273                generate_header_text: true,
2274                generate_line_text: true,
2275            },
2276            references: datasynth_config::schema::ReferenceTemplateConfig {
2277                generate_references: true,
2278                invoice_prefix: "TEST-INV".to_string(),
2279                po_prefix: "TEST-PO".to_string(),
2280                so_prefix: "TEST-SO".to_string(),
2281            },
2282        };
2283
2284        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2285            TransactionConfig::default(),
2286            coa,
2287            vec!["1000".to_string()],
2288            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2289            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2290            42,
2291            template_config,
2292            None,
2293        )
2294        .with_persona_errors(false); // Disable for template testing
2295
2296        for _ in 0..10 {
2297            let entry = je_gen.generate();
2298
2299            // Verify header text is populated
2300            assert!(
2301                entry.header.header_text.is_some(),
2302                "Header text should be populated"
2303            );
2304
2305            // Verify reference is populated
2306            assert!(
2307                entry.header.reference.is_some(),
2308                "Reference should be populated"
2309            );
2310
2311            // Verify business process is set
2312            assert!(
2313                entry.header.business_process.is_some(),
2314                "Business process should be set"
2315            );
2316
2317            // Verify line text is populated
2318            for line in &entry.lines {
2319                assert!(line.line_text.is_some(), "Line text should be populated");
2320            }
2321
2322            // Entry should still be balanced
2323            assert!(entry.is_balanced());
2324        }
2325    }
2326
2327    #[test]
2328    fn test_user_pool_integration() {
2329        let mut coa_gen =
2330            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2331        let coa = Arc::new(coa_gen.generate());
2332
2333        let companies = vec!["1000".to_string()];
2334
2335        // Generate user pool
2336        let mut user_gen = crate::UserGenerator::new(42);
2337        let user_pool = user_gen.generate_standard(&companies);
2338
2339        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2340            TransactionConfig::default(),
2341            coa,
2342            companies,
2343            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2344            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2345            42,
2346            TemplateConfig::default(),
2347            Some(user_pool),
2348        );
2349
2350        // Generate entries and verify user IDs are from pool
2351        for _ in 0..20 {
2352            let entry = je_gen.generate();
2353
2354            // User ID should not be generic BATCH/USER format when pool is used
2355            // (though it may still fall back if random selection misses)
2356            assert!(!entry.header.created_by.is_empty());
2357        }
2358    }
2359
2360    #[test]
2361    fn test_master_data_connection() {
2362        let mut coa_gen =
2363            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2364        let coa = Arc::new(coa_gen.generate());
2365
2366        // Create test vendors
2367        let vendors = vec![
2368            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2369            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2370        ];
2371
2372        // Create test customers
2373        let customers = vec![
2374            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2375            Customer::new(
2376                "C-TEST-002",
2377                "Test Customer Two",
2378                CustomerType::SmallBusiness,
2379            ),
2380        ];
2381
2382        // Create test materials
2383        let materials = vec![Material::new(
2384            "MAT-TEST-001",
2385            "Test Material A",
2386            MaterialType::RawMaterial,
2387        )];
2388
2389        // Create generator with master data
2390        let generator = JournalEntryGenerator::new_with_params(
2391            TransactionConfig::default(),
2392            coa,
2393            vec!["1000".to_string()],
2394            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2395            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2396            42,
2397        );
2398
2399        // Without master data
2400        assert!(!generator.is_using_real_master_data());
2401
2402        // Connect master data
2403        let generator_with_data = generator
2404            .with_vendors(&vendors)
2405            .with_customers(&customers)
2406            .with_materials(&materials);
2407
2408        // Should now be using real master data
2409        assert!(generator_with_data.is_using_real_master_data());
2410    }
2411
2412    #[test]
2413    fn test_with_master_data_convenience_method() {
2414        let mut coa_gen =
2415            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2416        let coa = Arc::new(coa_gen.generate());
2417
2418        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2419        let customers = vec![Customer::new(
2420            "C-001",
2421            "Customer One",
2422            CustomerType::Corporate,
2423        )];
2424        let materials = vec![Material::new(
2425            "MAT-001",
2426            "Material One",
2427            MaterialType::RawMaterial,
2428        )];
2429
2430        let generator = JournalEntryGenerator::new_with_params(
2431            TransactionConfig::default(),
2432            coa,
2433            vec!["1000".to_string()],
2434            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2435            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2436            42,
2437        )
2438        .with_master_data(&vendors, &customers, &materials);
2439
2440        assert!(generator.is_using_real_master_data());
2441    }
2442
2443    #[test]
2444    fn test_stress_factors_increase_error_rate() {
2445        let mut coa_gen =
2446            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2447        let coa = Arc::new(coa_gen.generate());
2448
2449        let generator = JournalEntryGenerator::new_with_params(
2450            TransactionConfig::default(),
2451            coa,
2452            vec!["1000".to_string()],
2453            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2454            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2455            42,
2456        );
2457
2458        let base_rate = 0.1;
2459
2460        // Regular day - no stress factors
2461        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2462        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2463        assert!(
2464            (regular_rate - base_rate).abs() < 0.01,
2465            "Regular day should have minimal stress factor adjustment"
2466        );
2467
2468        // Month end - 50% more errors
2469        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2470        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2471        assert!(
2472            month_end_rate > regular_rate,
2473            "Month end should have higher error rate than regular day"
2474        );
2475
2476        // Year end - double the error rate
2477        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2478        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2479        assert!(
2480            year_end_rate > month_end_rate,
2481            "Year end should have highest error rate"
2482        );
2483
2484        // Friday stress
2485        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2486        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2487        assert!(
2488            friday_rate > regular_rate,
2489            "Friday should have higher error rate than mid-week"
2490        );
2491
2492        // Monday stress
2493        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2494        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2495        assert!(
2496            monday_rate > regular_rate,
2497            "Monday should have higher error rate than mid-week"
2498        );
2499    }
2500
2501    #[test]
2502    fn test_batching_produces_similar_entries() {
2503        let mut coa_gen =
2504            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2505        let coa = Arc::new(coa_gen.generate());
2506
2507        // Use seed 123 which is more likely to trigger batching
2508        let mut je_gen = JournalEntryGenerator::new_with_params(
2509            TransactionConfig::default(),
2510            coa,
2511            vec!["1000".to_string()],
2512            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2513            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2514            123,
2515        )
2516        .with_persona_errors(false); // Disable to ensure balanced entries
2517
2518        // Generate many entries - at 15% batch rate, should see some batches
2519        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2520
2521        // Check that all entries are balanced (batched or not)
2522        for entry in &entries {
2523            assert!(
2524                entry.is_balanced(),
2525                "All entries including batched should be balanced"
2526            );
2527        }
2528
2529        // Count entries with same-day posting dates (batch indicator)
2530        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2531            std::collections::HashMap::new();
2532        for entry in &entries {
2533            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2534        }
2535
2536        // With batching, some dates should have multiple entries
2537        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2538        assert!(
2539            dates_with_multiple > 0,
2540            "With batching, should see some dates with multiple entries"
2541        );
2542    }
2543
2544    #[test]
2545    fn test_temporal_patterns_business_days() {
2546        use datasynth_config::schema::{
2547            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2548        };
2549
2550        let mut coa_gen =
2551            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2552        let coa = Arc::new(coa_gen.generate());
2553
2554        // Create temporal patterns config with business days enabled
2555        let temporal_config = TemporalPatternsConfig {
2556            enabled: true,
2557            business_days: BusinessDaySchemaConfig {
2558                enabled: true,
2559                ..Default::default()
2560            },
2561            calendars: CalendarSchemaConfig {
2562                regions: vec!["US".to_string()],
2563                custom_holidays: vec![],
2564            },
2565            ..Default::default()
2566        };
2567
2568        let mut je_gen = JournalEntryGenerator::new_with_params(
2569            TransactionConfig::default(),
2570            coa,
2571            vec!["1000".to_string()],
2572            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2573            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2574            42,
2575        )
2576        .with_temporal_patterns(temporal_config, 42)
2577        .with_persona_errors(false);
2578
2579        // Generate entries and verify none fall on weekends
2580        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2581
2582        for entry in &entries {
2583            let weekday = entry.header.posting_date.weekday();
2584            assert!(
2585                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2586                "Posting date {:?} should not be a weekend",
2587                entry.header.posting_date
2588            );
2589        }
2590    }
2591
2592    #[test]
2593    fn test_default_generation_filters_weekends() {
2594        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2595        // This tests the fix where new_with_full_config always creates a default
2596        // BusinessDayCalculator with US holidays as a fallback.
2597        let mut coa_gen =
2598            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2599        let coa = Arc::new(coa_gen.generate());
2600
2601        let mut je_gen = JournalEntryGenerator::new_with_params(
2602            TransactionConfig::default(),
2603            coa,
2604            vec!["1000".to_string()],
2605            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2606            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2607            42,
2608        )
2609        .with_persona_errors(false);
2610
2611        let total = 500;
2612        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2613
2614        let weekend_count = entries
2615            .iter()
2616            .filter(|e| {
2617                let wd = e.header.posting_date.weekday();
2618                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2619            })
2620            .count();
2621
2622        let weekend_pct = weekend_count as f64 / total as f64;
2623        assert!(
2624            weekend_pct < 0.05,
2625            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2626             but got {:.1}% ({}/{})",
2627            weekend_pct * 100.0,
2628            weekend_count,
2629            total
2630        );
2631    }
2632
2633    #[test]
2634    fn test_document_type_derived_from_business_process() {
2635        let mut coa_gen =
2636            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2637        let coa = Arc::new(coa_gen.generate());
2638
2639        let mut je_gen = JournalEntryGenerator::new_with_params(
2640            TransactionConfig::default(),
2641            coa,
2642            vec!["1000".to_string()],
2643            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2644            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2645            99,
2646        )
2647        .with_persona_errors(false)
2648        .with_batching(false);
2649
2650        let total = 200;
2651        let mut doc_types = std::collections::HashSet::new();
2652        let mut sa_count = 0_usize;
2653
2654        for _ in 0..total {
2655            let entry = je_gen.generate();
2656            let dt = &entry.header.document_type;
2657            doc_types.insert(dt.clone());
2658            if dt == "SA" {
2659                sa_count += 1;
2660            }
2661        }
2662
2663        // Should have more than 3 distinct document types
2664        assert!(
2665            doc_types.len() > 3,
2666            "Expected >3 distinct document types, got {} ({:?})",
2667            doc_types.len(),
2668            doc_types,
2669        );
2670
2671        // "SA" should be less than 50% (R2R is 20% of the weight)
2672        let sa_pct = sa_count as f64 / total as f64;
2673        assert!(
2674            sa_pct < 0.50,
2675            "Expected SA <50%, got {:.1}% ({}/{})",
2676            sa_pct * 100.0,
2677            sa_count,
2678            total,
2679        );
2680    }
2681
2682    #[test]
2683    fn test_enrich_line_items_account_description() {
2684        let mut coa_gen =
2685            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2686        let coa = Arc::new(coa_gen.generate());
2687
2688        let mut je_gen = JournalEntryGenerator::new_with_params(
2689            TransactionConfig::default(),
2690            coa,
2691            vec!["1000".to_string()],
2692            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2693            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2694            42,
2695        )
2696        .with_persona_errors(false);
2697
2698        let total = 200;
2699        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2700
2701        // Count lines with account_description populated
2702        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2703        let lines_with_desc: usize = entries
2704            .iter()
2705            .flat_map(|e| &e.lines)
2706            .filter(|l| l.account_description.is_some())
2707            .count();
2708
2709        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2710        assert!(
2711            desc_pct > 0.95,
2712            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2713            desc_pct * 100.0,
2714            lines_with_desc,
2715            total_lines,
2716        );
2717    }
2718
2719    #[test]
2720    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2721        let mut coa_gen =
2722            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2723        let coa = Arc::new(coa_gen.generate());
2724
2725        let mut je_gen = JournalEntryGenerator::new_with_params(
2726            TransactionConfig::default(),
2727            coa,
2728            vec!["1000".to_string()],
2729            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2730            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2731            42,
2732        )
2733        .with_persona_errors(false);
2734
2735        let total = 300;
2736        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2737
2738        // Count expense account lines (5xxx/6xxx) with cost_center populated
2739        let expense_lines: Vec<&JournalEntryLine> = entries
2740            .iter()
2741            .flat_map(|e| &e.lines)
2742            .filter(|l| {
2743                let first = l.gl_account.chars().next().unwrap_or('0');
2744                first == '5' || first == '6'
2745            })
2746            .collect();
2747
2748        if !expense_lines.is_empty() {
2749            let with_cc = expense_lines
2750                .iter()
2751                .filter(|l| l.cost_center.is_some())
2752                .count();
2753            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2754            assert!(
2755                cc_pct > 0.80,
2756                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2757                cc_pct * 100.0,
2758                with_cc,
2759                expense_lines.len(),
2760            );
2761        }
2762    }
2763
2764    #[test]
2765    fn test_enrich_line_items_profit_center_and_line_text() {
2766        let mut coa_gen =
2767            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2768        let coa = Arc::new(coa_gen.generate());
2769
2770        let mut je_gen = JournalEntryGenerator::new_with_params(
2771            TransactionConfig::default(),
2772            coa,
2773            vec!["1000".to_string()],
2774            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2775            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2776            42,
2777        )
2778        .with_persona_errors(false);
2779
2780        let total = 100;
2781        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2782
2783        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2784
2785        // All lines should have profit_center
2786        let with_pc = entries
2787            .iter()
2788            .flat_map(|e| &e.lines)
2789            .filter(|l| l.profit_center.is_some())
2790            .count();
2791        let pc_pct = with_pc as f64 / total_lines as f64;
2792        assert!(
2793            pc_pct > 0.95,
2794            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2795            pc_pct * 100.0,
2796            with_pc,
2797            total_lines,
2798        );
2799
2800        // All lines should have line_text (either from template or header fallback)
2801        let with_text = entries
2802            .iter()
2803            .flat_map(|e| &e.lines)
2804            .filter(|l| l.line_text.is_some())
2805            .count();
2806        let text_pct = with_text as f64 / total_lines as f64;
2807        assert!(
2808            text_pct > 0.95,
2809            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2810            text_pct * 100.0,
2811            with_text,
2812            total_lines,
2813        );
2814    }
2815
2816    // --- ISA 240 audit flag tests ---
2817
2818    #[test]
2819    fn test_je_has_audit_flags() {
2820        let mut coa_gen =
2821            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2822        let coa = Arc::new(coa_gen.generate());
2823
2824        let mut je_gen = JournalEntryGenerator::new_with_params(
2825            TransactionConfig::default(),
2826            coa,
2827            vec!["1000".to_string()],
2828            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2829            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2830            42,
2831        )
2832        .with_persona_errors(false);
2833
2834        for _ in 0..100 {
2835            let entry = je_gen.generate();
2836
2837            // source_system should always be non-empty
2838            assert!(
2839                !entry.header.source_system.is_empty(),
2840                "source_system should be populated, got empty string"
2841            );
2842
2843            // created_by should always be non-empty (already tested elsewhere, but confirm)
2844            assert!(
2845                !entry.header.created_by.is_empty(),
2846                "created_by should be populated"
2847            );
2848
2849            // created_date should always be populated
2850            assert!(
2851                entry.header.created_date.is_some(),
2852                "created_date should be populated"
2853            );
2854        }
2855    }
2856
2857    #[test]
2858    fn test_manual_entry_rate() {
2859        let mut coa_gen =
2860            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2861        let coa = Arc::new(coa_gen.generate());
2862
2863        let mut je_gen = JournalEntryGenerator::new_with_params(
2864            TransactionConfig::default(),
2865            coa,
2866            vec!["1000".to_string()],
2867            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2868            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2869            42,
2870        )
2871        .with_persona_errors(false)
2872        .with_batching(false);
2873
2874        let total = 1000;
2875        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2876
2877        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
2878        let manual_rate = manual_count as f64 / total as f64;
2879
2880        // Default source_distribution.manual is typically around 0.05-0.15
2881        // Allow a wide tolerance for statistical variation
2882        assert!(
2883            manual_rate > 0.01 && manual_rate < 0.50,
2884            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
2885            manual_rate * 100.0,
2886            manual_count,
2887            total,
2888        );
2889
2890        // is_manual should match TransactionSource::Manual
2891        for entry in &entries {
2892            let source_is_manual = entry.header.source == TransactionSource::Manual;
2893            assert_eq!(
2894                entry.header.is_manual, source_is_manual,
2895                "is_manual should match source == Manual"
2896            );
2897        }
2898    }
2899
2900    #[test]
2901    fn test_manual_source_consistency() {
2902        let mut coa_gen =
2903            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2904        let coa = Arc::new(coa_gen.generate());
2905
2906        let mut je_gen = JournalEntryGenerator::new_with_params(
2907            TransactionConfig::default(),
2908            coa,
2909            vec!["1000".to_string()],
2910            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2911            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2912            42,
2913        )
2914        .with_persona_errors(false)
2915        .with_batching(false);
2916
2917        for _ in 0..500 {
2918            let entry = je_gen.generate();
2919
2920            if entry.header.is_manual {
2921                // Manual entries must have source_system "manual" or "spreadsheet"
2922                assert!(
2923                    entry.header.source_system == "manual"
2924                        || entry.header.source_system == "spreadsheet",
2925                    "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
2926                    entry.header.source_system,
2927                );
2928            } else {
2929                // Non-manual entries must NOT have source_system "manual" or "spreadsheet"
2930                assert!(
2931                    entry.header.source_system != "manual"
2932                        && entry.header.source_system != "spreadsheet",
2933                    "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
2934                    entry.header.source_system,
2935                );
2936            }
2937        }
2938    }
2939
2940    #[test]
2941    fn test_created_date_before_posting() {
2942        let mut coa_gen =
2943            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2944        let coa = Arc::new(coa_gen.generate());
2945
2946        let mut je_gen = JournalEntryGenerator::new_with_params(
2947            TransactionConfig::default(),
2948            coa,
2949            vec!["1000".to_string()],
2950            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2951            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2952            42,
2953        )
2954        .with_persona_errors(false);
2955
2956        for _ in 0..500 {
2957            let entry = je_gen.generate();
2958
2959            if let Some(created_date) = entry.header.created_date {
2960                let created_naive_date = created_date.date();
2961                assert!(
2962                    created_naive_date <= entry.header.posting_date,
2963                    "created_date ({}) should be <= posting_date ({})",
2964                    created_naive_date,
2965                    entry.header.posting_date,
2966                );
2967            }
2968        }
2969    }
2970}