Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15    TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18    AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19    DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20    PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21    ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34/// Generator for realistic journal entries.
35pub struct JournalEntryGenerator {
36    rng: ChaCha8Rng,
37    seed: u64,
38    config: TransactionConfig,
39    coa: Arc<ChartOfAccounts>,
40    companies: Vec<String>,
41    company_selector: WeightedCompanySelector,
42    line_sampler: LineItemSampler,
43    amount_sampler: AmountSampler,
44    temporal_sampler: TemporalSampler,
45    start_date: NaiveDate,
46    end_date: NaiveDate,
47    count: u64,
48    uuid_factory: DeterministicUuidFactory,
49    // Enhanced features
50    user_pool: Option<UserPool>,
51    description_generator: DescriptionGenerator,
52    reference_generator: ReferenceGenerator,
53    template_config: TemplateConfig,
54    vendor_pool: VendorPool,
55    customer_pool: CustomerPool,
56    // Material pool for realistic material references
57    material_pool: Option<MaterialPool>,
58    // Cost-center IDs sourced from the generated cost-centers master so
59    // `JE.cost_center` joins back to `cost_centers.id`.  Populated via
60    // [`with_cost_center_pool`] from the orchestrator after master-data
61    // generation; falls back to the hardcoded `COST_CENTER_POOL` const
62    // when empty (configs that skip master-data generation).
63    cost_center_pool: Vec<String>,
64    // Profit-center IDs sourced from the generated profit-centers master
65    // so `JE.profit_center` joins back to `profit_centers.id`.  Same
66    // population semantics as `cost_center_pool`.
67    profit_center_pool: Vec<String>,
68    // Flag indicating whether we're using real master data vs defaults
69    using_real_master_data: bool,
70    // Fraud generation
71    fraud_config: FraudConfig,
72    // Persona-based error injection
73    persona_errors_enabled: bool,
74    // Approval threshold enforcement
75    approval_enabled: bool,
76    approval_threshold: rust_decimal::Decimal,
77    // SOD violation rate for approval tracking (0.0 to 1.0)
78    sod_violation_rate: f64,
79    // Batching behavior - humans often process similar items together
80    batch_state: Option<BatchState>,
81    // Temporal drift controller for simulating distribution changes over time
82    drift_controller: Option<DriftController>,
83    // Temporal patterns components
84    business_day_calculator: Option<BusinessDayCalculator>,
85    processing_lag_calculator: Option<ProcessingLagCalculator>,
86    temporal_patterns_config: Option<TemporalPatternsConfig>,
87    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
88    // sum to 1.0 (validated by config schema). Default matches the legacy
89    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
90    business_process_weights: [(BusinessProcess, f64); 5],
91    // v3.4.0 advanced distributions (mixture models + industry profiles).
92    // None preserves v3.3.2 byte-for-byte behavior; populated only when the
93    // caller opts in via [`set_advanced_distributions`].
94    advanced_amount_sampler: Option<AdvancedAmountSampler>,
95    // v3.5.3+ conditional amount override. Populated when
96    // `config.distributions.conditional` contains an entry where
97    // `output_field == "amount"` and `input_field ∈ {"month",
98    // "quarter", "constant"}`. Applied *after* the fraud-pattern /
99    // advanced-sampler / legacy-sampler cascade on non-fraud entries
100    // so it can steer amounts by calendar context without disturbing
101    // fraud semantics.
102    conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
103    // v3.5.4+ Gaussian copula for amount↔line_count correlation. When
104    // populated, each non-fraud JE draws a (u, v) pair; u nudges amount
105    // via a `(0.75 + 0.5*u)` multiplier and v biases line_count toward
106    // the upper/lower end of its range. Produces observable Spearman
107    // correlation without rewiring existing samplers for inverse-CDF.
108    correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
109}
110
111const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
112    (BusinessProcess::O2C, 0.35),
113    (BusinessProcess::P2P, 0.30),
114    (BusinessProcess::R2R, 0.20),
115    (BusinessProcess::H2R, 0.10),
116    (BusinessProcess::A2R, 0.05),
117];
118
119/// Map the schema-level [`datasynth_config::schema::IndustryProfileType`]
120/// onto the distributions-layer [`IndustryType`], then return that industry's
121/// pre-configured `sales_amounts` mixture. Used as a fallback when the
122/// caller enables `distributions.amounts` but supplies no components.
123/// Per-entry context channels for conditional-distribution overrides.
124///
125/// v4.1.0+ supported `input_field` values:
126///
127///   - `"month"` — posting-date month (1..=12)
128///   - `"quarter"` — posting-date quarter (1..=4)
129///   - `"year"` — posting-date year (e.g. 2026.0)
130///   - `"day_of_week"` — 1 (Mon) .. 7 (Sun)
131///   - `"day_of_month"` — 1..=31
132///   - `"day_of_year"` — 1..=366
133///   - `"week_of_year"` — 1..=53
134///   - `"is_period_end"` — 1.0 when posting_date is the last business
135///     day of the month, else 0.0
136///   - `"is_quarter_end"` — 1.0 when posting_date is in a quarter-end
137///     month AND is the last business day, else 0.0
138///   - `"is_year_end"` — 1.0 when posting_date is in December AND is
139///     the last business day, else 0.0
140///   - `"constant"` / empty — always 0.0 (treats as unconditional)
141///
142/// Unsupported values cause the conditional rule to be silently ignored
143/// to keep runtime robust against user typos.
144impl JournalEntryGenerator {
145    fn supported_conditional_input(field: &str) -> bool {
146        matches!(
147            field,
148            "month"
149                | "quarter"
150                | "year"
151                | "day_of_week"
152                | "day_of_month"
153                | "day_of_year"
154                | "week_of_year"
155                | "is_period_end"
156                | "is_quarter_end"
157                | "is_year_end"
158                | "constant"
159                | ""
160        )
161    }
162
163    fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
164        let input_field = match self
165            .conditional_amount_override
166            .as_ref()
167            .map(|s| s.config().input_field.as_str())
168        {
169            Some(f) => f,
170            None => return 0.0,
171        };
172
173        let is_last_business_day = |d: chrono::NaiveDate| -> bool {
174            // Last day-of-month → is_period_end. Handles Feb/leap-year
175            // via chrono's num_days_from_ce roundabout; simpler path:
176            // if adding 1 day moves to a different month, this is EOM.
177            let next = d.succ_opt();
178            match next {
179                Some(n) => n.month() != d.month(),
180                None => true,
181            }
182        };
183
184        match input_field {
185            "month" => posting_date.month() as f64,
186            "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
187            "year" => posting_date.year() as f64,
188            "day_of_week" => posting_date.weekday().number_from_monday() as f64,
189            "day_of_month" => posting_date.day() as f64,
190            "day_of_year" => posting_date.ordinal() as f64,
191            "week_of_year" => posting_date.iso_week().week() as f64,
192            "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
193            "is_quarter_end" => {
194                let m = posting_date.month();
195                let is_q_month = matches!(m, 3 | 6 | 9 | 12);
196                f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
197            }
198            "is_year_end" => f64::from(u8::from(
199                posting_date.month() == 12 && is_last_business_day(posting_date),
200            )),
201            _ => 0.0,
202        }
203    }
204}
205
206fn industry_profile_to_log_normal(
207    p: datasynth_config::schema::IndustryProfileType,
208) -> datasynth_core::distributions::LogNormalMixtureConfig {
209    use datasynth_config::schema::IndustryProfileType as P;
210    let industry = match p {
211        P::Retail => IndustryType::Retail,
212        P::Manufacturing => IndustryType::Manufacturing,
213        P::FinancialServices => IndustryType::FinancialServices,
214        P::Healthcare => IndustryType::Healthcare,
215        P::Technology => IndustryType::Technology,
216    };
217    IndustryAmountProfile::for_industry(industry).sales_amounts
218}
219
220/// State for tracking batch processing behavior.
221///
222/// When humans process transactions, they often batch similar items together
223/// (e.g., processing all invoices from one vendor, entering similar expenses).
224#[derive(Clone)]
225struct BatchState {
226    /// The base entry template to vary
227    base_account_number: String,
228    base_amount: rust_decimal::Decimal,
229    base_business_process: Option<BusinessProcess>,
230    base_posting_date: NaiveDate,
231    /// Remaining entries in this batch
232    remaining: u8,
233}
234
235impl JournalEntryGenerator {
236    /// Create a new journal entry generator.
237    pub fn new_with_params(
238        config: TransactionConfig,
239        coa: Arc<ChartOfAccounts>,
240        companies: Vec<String>,
241        start_date: NaiveDate,
242        end_date: NaiveDate,
243        seed: u64,
244    ) -> Self {
245        Self::new_with_full_config(
246            config,
247            coa,
248            companies,
249            start_date,
250            end_date,
251            seed,
252            TemplateConfig::default(),
253            None,
254        )
255    }
256
257    /// Create a new journal entry generator with full configuration.
258    #[allow(clippy::too_many_arguments)]
259    pub fn new_with_full_config(
260        config: TransactionConfig,
261        coa: Arc<ChartOfAccounts>,
262        companies: Vec<String>,
263        start_date: NaiveDate,
264        end_date: NaiveDate,
265        seed: u64,
266        template_config: TemplateConfig,
267        user_pool: Option<UserPool>,
268    ) -> Self {
269        // Initialize user pool if not provided
270        let user_pool = user_pool.or_else(|| {
271            if template_config.names.generate_realistic_names {
272                let user_gen_config = UserGeneratorConfig {
273                    culture_distribution: vec![
274                        (
275                            datasynth_core::templates::NameCulture::WesternUs,
276                            template_config.names.culture_distribution.western_us,
277                        ),
278                        (
279                            datasynth_core::templates::NameCulture::Hispanic,
280                            template_config.names.culture_distribution.hispanic,
281                        ),
282                        (
283                            datasynth_core::templates::NameCulture::German,
284                            template_config.names.culture_distribution.german,
285                        ),
286                        (
287                            datasynth_core::templates::NameCulture::French,
288                            template_config.names.culture_distribution.french,
289                        ),
290                        (
291                            datasynth_core::templates::NameCulture::Chinese,
292                            template_config.names.culture_distribution.chinese,
293                        ),
294                        (
295                            datasynth_core::templates::NameCulture::Japanese,
296                            template_config.names.culture_distribution.japanese,
297                        ),
298                        (
299                            datasynth_core::templates::NameCulture::Indian,
300                            template_config.names.culture_distribution.indian,
301                        ),
302                    ],
303                    email_domain: template_config.names.email_domain.clone(),
304                    generate_realistic_names: true,
305                };
306                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
307                Some(user_gen.generate_standard(&companies))
308            } else {
309                None
310            }
311        });
312
313        // Initialize reference generator
314        let mut ref_gen = ReferenceGenerator::new(
315            start_date.year(),
316            companies
317                .first()
318                .map(std::string::String::as_str)
319                .unwrap_or("1000"),
320        );
321        ref_gen.set_prefix(
322            ReferenceType::Invoice,
323            &template_config.references.invoice_prefix,
324        );
325        ref_gen.set_prefix(
326            ReferenceType::PurchaseOrder,
327            &template_config.references.po_prefix,
328        );
329        ref_gen.set_prefix(
330            ReferenceType::SalesOrder,
331            &template_config.references.so_prefix,
332        );
333
334        // Create weighted company selector (uniform weights for this constructor)
335        let company_selector = WeightedCompanySelector::uniform(companies.clone());
336
337        Self {
338            rng: seeded_rng(seed, 0),
339            seed,
340            config: config.clone(),
341            coa,
342            companies,
343            company_selector,
344            line_sampler: LineItemSampler::with_config(
345                seed + 1,
346                config.line_item_distribution.clone(),
347                config.even_odd_distribution.clone(),
348                config.debit_credit_distribution.clone(),
349            ),
350            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
351            temporal_sampler: TemporalSampler::with_config(
352                seed + 3,
353                config.seasonality.clone(),
354                WorkingHoursConfig::default(),
355                Vec::new(),
356            ),
357            start_date,
358            end_date,
359            count: 0,
360            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
361            user_pool,
362            description_generator: DescriptionGenerator::new(),
363            reference_generator: ref_gen,
364            template_config,
365            vendor_pool: VendorPool::standard(),
366            customer_pool: CustomerPool::standard(),
367            material_pool: None,
368            cost_center_pool: Vec::new(),
369            profit_center_pool: Vec::new(),
370            using_real_master_data: false,
371            fraud_config: FraudConfig::default(),
372            persona_errors_enabled: true, // Enable by default for realism
373            approval_enabled: true,       // Enable by default for realism
374            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
375            sod_violation_rate: 0.10,     // 10% default SOD violation rate
376            batch_state: None,
377            drift_controller: None,
378            // Always provide a basic BusinessDayCalculator so that weekend/holiday
379            // filtering is active even when temporal_patterns is not explicitly enabled.
380            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
381                Region::US,
382                start_date.year(),
383            ))),
384            processing_lag_calculator: None,
385            temporal_patterns_config: None,
386            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
387            advanced_amount_sampler: None,
388            conditional_amount_override: None,
389            correlation_copula: None,
390        }
391    }
392
393    /// Wire v3.4.0 advanced distributions. When the caller's config has
394    /// `distributions.enabled = true` AND `distributions.amounts.enabled =
395    /// true`, the journal-entry generator routes non-fraud amount sampling
396    /// through an [`AdvancedAmountSampler`] (log-normal or Gaussian mixture).
397    ///
398    /// When `distributions.industry_profile` is `Some`, the caller's
399    /// explicitly configured components override nothing — if the component
400    /// list is empty, the industry profile's `sales_amounts` mixture is used
401    /// instead. Explicit components always win.
402    ///
403    /// Returning `Ok(())` with no side effect is intentional for the
404    /// following no-op cases, so callers can unconditionally invoke this:
405    ///   - `config.enabled = false`
406    ///   - `config.amounts.enabled = false`
407    ///   - empty component list with no industry profile
408    ///
409    /// Errors propagate from mixture validation (e.g. weights not summing
410    /// to 1.0, non-positive sigma).
411    pub fn set_advanced_distributions(
412        &mut self,
413        config: &AdvancedDistributionConfig,
414        seed: u64,
415    ) -> Result<(), String> {
416        if !config.enabled {
417            return Ok(());
418        }
419
420        // v3.5.3+: build a conditional-amount override when the config
421        // declares a rule with `output_field == "amount"` and a supported
422        // input field. The override is applied *after* the standard
423        // cascade so it doesn't disturb fraud-path sampling. Unsupported
424        // input fields are ignored with a trace log.
425        self.conditional_amount_override = config
426            .conditional
427            .iter()
428            .find(|c| {
429                c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
430            })
431            .and_then(|c| {
432                datasynth_core::distributions::ConditionalSampler::new(
433                    seed.wrapping_add(17),
434                    c.to_core_config(),
435                )
436                .ok()
437            });
438
439        // v4.1.0+: all 5 copula types wired (Gaussian / Clayton /
440        // Gumbel / Frank / Student-t). The `BivariateCopulaSampler`
441        // already implements each; v3.5.4 had a filter limiting to
442        // Gaussian only — lifted here now that the smoke test matrix
443        // covers all types.
444        self.correlation_copula = config
445            .correlations
446            .to_core_config_for_pair("amount", "line_count")
447            .and_then(|copula_cfg| {
448                datasynth_core::distributions::BivariateCopulaSampler::new(
449                    seed.wrapping_add(31),
450                    copula_cfg,
451                )
452                .ok()
453            });
454
455        // v3.4.4+: Pareto takes precedence over mixture models when set.
456        // This supports heavy-tailed amount distributions (capex, strategic
457        // contracts, fraud) that log-normal/Gaussian mixtures can't model
458        // as sharply.
459        if let Some(pareto) = &config.pareto {
460            if pareto.enabled {
461                let core_cfg = pareto.to_core_config();
462                self.advanced_amount_sampler =
463                    Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
464                return Ok(());
465            }
466        }
467
468        if !config.amounts.enabled {
469            return Ok(());
470        }
471
472        match config.amounts.distribution_type {
473            MixtureDistributionType::LogNormal => {
474                let lognormal_cfg = config
475                    .amounts
476                    .to_log_normal_config()
477                    .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
478                if let Some(cfg) = lognormal_cfg {
479                    self.advanced_amount_sampler =
480                        Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
481                }
482            }
483            MixtureDistributionType::Gaussian => {
484                if let Some(cfg) = config.amounts.to_gaussian_config() {
485                    self.advanced_amount_sampler =
486                        Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
487                }
488            }
489        }
490
491        Ok(())
492    }
493
494    /// Override the business-process volume mix. Weights map directly to the
495    /// `business_processes.*_weight` YAML config; they do not have to sum to
496    /// exactly 1.0 (they're normalized via `weighted_select`).
497    pub fn set_business_process_weights(
498        &mut self,
499        o2c: f64,
500        p2p: f64,
501        r2r: f64,
502        h2r: f64,
503        a2r: f64,
504    ) {
505        self.business_process_weights = [
506            (BusinessProcess::O2C, o2c),
507            (BusinessProcess::P2P, p2p),
508            (BusinessProcess::R2R, r2r),
509            (BusinessProcess::H2R, h2r),
510            (BusinessProcess::A2R, a2r),
511        ];
512    }
513
514    /// Create from a full GeneratorConfig.
515    ///
516    /// This constructor uses the volume_weight from company configs
517    /// for weighted company selection, and fraud config from GeneratorConfig.
518    pub fn from_generator_config(
519        full_config: &GeneratorConfig,
520        coa: Arc<ChartOfAccounts>,
521        start_date: NaiveDate,
522        end_date: NaiveDate,
523        seed: u64,
524    ) -> Self {
525        let companies: Vec<String> = full_config
526            .companies
527            .iter()
528            .map(|c| c.code.clone())
529            .collect();
530
531        // Create weighted selector using volume_weight from company configs
532        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
533
534        let mut generator = Self::new_with_full_config(
535            full_config.transactions.clone(),
536            coa,
537            companies,
538            start_date,
539            end_date,
540            seed,
541            full_config.templates.clone(),
542            None,
543        );
544
545        // Override the uniform selector with weighted selector
546        generator.company_selector = company_selector;
547
548        // Set fraud config
549        generator.fraud_config = full_config.fraud.clone();
550
551        // Configure temporal patterns if enabled
552        let temporal_config = &full_config.temporal_patterns;
553        if temporal_config.enabled {
554            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
555        }
556
557        generator
558    }
559
560    /// Configure temporal patterns including business day calculations and processing lags.
561    ///
562    /// This enables realistic temporal behavior including:
563    /// - Business day awareness (no postings on weekends/holidays)
564    /// - Processing lag modeling (event-to-posting delays)
565    /// - Period-end dynamics (volume spikes at month/quarter/year end)
566    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
567        // Create business day calculator if enabled
568        if config.business_days.enabled {
569            let region = config
570                .calendars
571                .regions
572                .first()
573                .map(|r| Self::parse_region(r))
574                .unwrap_or(Region::US);
575
576            let calendar = HolidayCalendar::new(region, self.start_date.year());
577            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
578        }
579
580        // Create processing lag calculator if enabled
581        if config.processing_lags.enabled {
582            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
583            self.processing_lag_calculator =
584                Some(ProcessingLagCalculator::with_config(seed, lag_config));
585        }
586
587        // Create period-end dynamics if configured
588        let model = config.period_end.model.as_deref().unwrap_or("flat");
589        if model != "flat"
590            || config
591                .period_end
592                .month_end
593                .as_ref()
594                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
595        {
596            let dynamics = Self::convert_period_end_config(&config.period_end);
597            self.temporal_sampler.set_period_end_dynamics(dynamics);
598        }
599
600        self.temporal_patterns_config = Some(config);
601        self
602    }
603
604    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
605    ///
606    /// This is an alternative to [`with_temporal_patterns`] that derives the
607    /// holiday calendar from a country-pack definition rather than the built-in
608    /// region-based calendars.  All other temporal behaviour (business-day
609    /// adjustment, processing lags, period-end dynamics) is configured
610    /// identically.
611    pub fn with_country_pack_temporal(
612        mut self,
613        config: TemporalPatternsConfig,
614        seed: u64,
615        pack: &CountryPack,
616    ) -> Self {
617        // Create business day calculator using the country pack calendar
618        if config.business_days.enabled {
619            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
620            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
621        }
622
623        // Create processing lag calculator if enabled
624        if config.processing_lags.enabled {
625            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
626            self.processing_lag_calculator =
627                Some(ProcessingLagCalculator::with_config(seed, lag_config));
628        }
629
630        // Create period-end dynamics if configured
631        let model = config.period_end.model.as_deref().unwrap_or("flat");
632        if model != "flat"
633            || config
634                .period_end
635                .month_end
636                .as_ref()
637                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
638        {
639            let dynamics = Self::convert_period_end_config(&config.period_end);
640            self.temporal_sampler.set_period_end_dynamics(dynamics);
641        }
642
643        self.temporal_patterns_config = Some(config);
644        self
645    }
646
647    /// Convert schema processing lag config to core config.
648    fn convert_processing_lag_config(
649        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
650    ) -> ProcessingLagConfig {
651        let mut config = ProcessingLagConfig {
652            enabled: schema.enabled,
653            ..Default::default()
654        };
655
656        // Helper to convert lag schema to distribution
657        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
658            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
659            if let Some(min) = lag.min_hours {
660                dist.min_lag_hours = min;
661            }
662            if let Some(max) = lag.max_hours {
663                dist.max_lag_hours = max;
664            }
665            dist
666        };
667
668        // Apply event-specific lags
669        if let Some(ref lag) = schema.sales_order_lag {
670            config
671                .event_lags
672                .insert(EventType::SalesOrder, convert_lag(lag));
673        }
674        if let Some(ref lag) = schema.purchase_order_lag {
675            config
676                .event_lags
677                .insert(EventType::PurchaseOrder, convert_lag(lag));
678        }
679        if let Some(ref lag) = schema.goods_receipt_lag {
680            config
681                .event_lags
682                .insert(EventType::GoodsReceipt, convert_lag(lag));
683        }
684        if let Some(ref lag) = schema.invoice_receipt_lag {
685            config
686                .event_lags
687                .insert(EventType::InvoiceReceipt, convert_lag(lag));
688        }
689        if let Some(ref lag) = schema.invoice_issue_lag {
690            config
691                .event_lags
692                .insert(EventType::InvoiceIssue, convert_lag(lag));
693        }
694        if let Some(ref lag) = schema.payment_lag {
695            config
696                .event_lags
697                .insert(EventType::Payment, convert_lag(lag));
698        }
699        if let Some(ref lag) = schema.journal_entry_lag {
700            config
701                .event_lags
702                .insert(EventType::JournalEntry, convert_lag(lag));
703        }
704
705        // Apply cross-day posting config
706        if let Some(ref cross_day) = schema.cross_day_posting {
707            config.cross_day = CrossDayConfig {
708                enabled: cross_day.enabled,
709                probability_by_hour: cross_day.probability_by_hour.clone(),
710                ..Default::default()
711            };
712        }
713
714        config
715    }
716
717    /// Convert schema period-end config to core PeriodEndDynamics.
718    fn convert_period_end_config(
719        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
720    ) -> PeriodEndDynamics {
721        let model_type = schema.model.as_deref().unwrap_or("exponential");
722
723        // Helper to convert period config
724        let convert_period =
725            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
726             default_peak: f64|
727             -> PeriodEndConfig {
728                if let Some(p) = period {
729                    let model = match model_type {
730                        "flat" => PeriodEndModel::FlatMultiplier {
731                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
732                        },
733                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
734                            start_day: p.start_day.unwrap_or(-10),
735                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
736                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
737                            ramp_up_days: 3, // Default ramp-up period
738                        },
739                        _ => PeriodEndModel::ExponentialAcceleration {
740                            start_day: p.start_day.unwrap_or(-10),
741                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
742                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
743                            decay_rate: p.decay_rate.unwrap_or(0.3),
744                        },
745                    };
746                    PeriodEndConfig {
747                        enabled: true,
748                        model,
749                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
750                    }
751                } else {
752                    PeriodEndConfig {
753                        enabled: true,
754                        model: PeriodEndModel::ExponentialAcceleration {
755                            start_day: -10,
756                            base_multiplier: 1.0,
757                            peak_multiplier: default_peak,
758                            decay_rate: 0.3,
759                        },
760                        additional_multiplier: 1.0,
761                    }
762                }
763            };
764
765        PeriodEndDynamics::new(
766            convert_period(schema.month_end.as_ref(), 2.0),
767            convert_period(schema.quarter_end.as_ref(), 3.5),
768            convert_period(schema.year_end.as_ref(), 5.0),
769        )
770    }
771
772    /// Parse a region string into a Region enum.
773    fn parse_region(region_str: &str) -> Region {
774        match region_str.to_uppercase().as_str() {
775            "US" => Region::US,
776            "DE" => Region::DE,
777            "GB" => Region::GB,
778            "CN" => Region::CN,
779            "JP" => Region::JP,
780            "IN" => Region::IN,
781            "BR" => Region::BR,
782            "MX" => Region::MX,
783            "AU" => Region::AU,
784            "SG" => Region::SG,
785            "KR" => Region::KR,
786            "FR" => Region::FR,
787            "IT" => Region::IT,
788            "ES" => Region::ES,
789            "CA" => Region::CA,
790            _ => Region::US,
791        }
792    }
793
794    /// Set a custom company selector.
795    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
796        self.company_selector = selector;
797    }
798
799    /// Get the current company selector.
800    pub fn company_selector(&self) -> &WeightedCompanySelector {
801        &self.company_selector
802    }
803
804    /// Set fraud configuration.
805    pub fn set_fraud_config(&mut self, config: FraudConfig) {
806        self.fraud_config = config;
807    }
808
809    /// Set vendors from generated master data.
810    ///
811    /// This replaces the default vendor pool with actual generated vendors,
812    /// ensuring JEs reference real master data entities.
813    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
814        if !vendors.is_empty() {
815            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
816            self.using_real_master_data = true;
817        }
818        self
819    }
820
821    /// Set customers from generated master data.
822    ///
823    /// This replaces the default customer pool with actual generated customers,
824    /// ensuring JEs reference real master data entities.
825    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
826        if !customers.is_empty() {
827            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
828            self.using_real_master_data = true;
829        }
830        self
831    }
832
833    /// Set materials from generated master data.
834    ///
835    /// This provides material references for JEs that involve inventory movements.
836    pub fn with_materials(mut self, materials: &[Material]) -> Self {
837        if !materials.is_empty() {
838            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
839            self.using_real_master_data = true;
840        }
841        self
842    }
843
844    /// Set all master data at once for convenience.
845    ///
846    /// This is the recommended way to configure the JE generator with
847    /// generated master data to ensure data coherence.
848    pub fn with_master_data(
849        self,
850        vendors: &[Vendor],
851        customers: &[Customer],
852        materials: &[Material],
853    ) -> Self {
854        self.with_vendors(vendors)
855            .with_customers(customers)
856            .with_materials(materials)
857    }
858
859    /// Set the cost-center pool used by line-item enrichment.
860    ///
861    /// The orchestrator wires this from the generated cost-centers
862    /// master so `JE.cost_center` joins back to `cost_centers.id`.
863    /// When the pool is non-empty `enrich_line_items` picks
864    /// deterministically from it; the hardcoded fallback
865    /// `COST_CENTER_POOL` const is only used when the pool is empty
866    /// (configs that don't generate cost-center master data).
867    pub fn with_cost_center_pool(mut self, ids: Vec<String>) -> Self {
868        self.cost_center_pool = ids;
869        self
870    }
871
872    /// Set the profit-center pool used by line-item enrichment.
873    ///
874    /// Same semantics as [`with_cost_center_pool`] but for the
875    /// profit-centers master.  Without this, the legacy
876    /// `PC-{company_code}-{P2P|O2C|R2R|H2R}` derivation is used —
877    /// which is consistent within a generation run but does not
878    /// match the format the master data generator emits.
879    pub fn with_profit_center_pool(mut self, ids: Vec<String>) -> Self {
880        self.profit_center_pool = ids;
881        self
882    }
883
884    /// Replace the auto-generated user pool with an externally-built one.
885    ///
886    /// The orchestrator builds a [`UserPool`] from the generated
887    /// employee master ([`UserPool::from_employees`]) and passes it
888    /// here, so `JE.created_by` joins back to `employees.user_id`.
889    /// Without this call, [`with_country_pack_names`] generates its
890    /// own user pool whose ids are disjoint from the employee
891    /// master.
892    pub fn with_user_pool(mut self, pool: UserPool) -> Self {
893        self.user_pool = Some(pool);
894        self
895    }
896
897    /// Replace the user pool with one generated from a [`CountryPack`].
898    ///
899    /// This is an alternative to the default name-culture distribution that
900    /// derives name pools and weights from the country-pack's `names` section.
901    /// The existing user pool (if any) is discarded and regenerated using
902    /// [`MultiCultureNameGenerator::from_country_pack`].
903    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
904        let name_gen =
905            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
906        let config = UserGeneratorConfig {
907            // The culture distribution is embedded in the name generator
908            // itself, so we use an empty list here.
909            culture_distribution: Vec::new(),
910            email_domain: name_gen.email_domain().to_string(),
911            generate_realistic_names: true,
912        };
913        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
914        self.user_pool = Some(user_gen.generate_standard(&self.companies));
915        self
916    }
917
918    /// Check if the generator is using real master data.
919    pub fn is_using_real_master_data(&self) -> bool {
920        self.using_real_master_data
921    }
922
923    /// Determine if this transaction should be fraudulent.
924    /// Pick a realistic ERP `source_system` provenance code.
925    ///
926    /// Returns a string like `"SAP-FI/AP"`, `"manual/adjustment"`,
927    /// `"Interface/EDI"`. Uses the business process to bias toward
928    /// process-appropriate sub-modules (e.g. P2P → SAP-MM/IV, O2C →
929    /// SAP-SD/IV, H2R → SAP-HR/PR). The legacy 7-code shape
930    /// (`SAP-FI`, `SAP-MM`, etc.) is preserved as a prefix so existing
931    /// `starts_with` filters keep working.
932    ///
933    /// **Manual contract**: when `is_manual` is true the returned value
934    /// always starts with `"manual"` or `"spreadsheet"`. This is asserted
935    /// in `test_isa240_audit_flags_populated`.
936    fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
937        if is_manual {
938            // 8 manual provenance codes — all share a `manual/` or
939            // `spreadsheet/` prefix.
940            const MANUAL: &[&str] = &[
941                "manual/standard",
942                "manual/adjustment",
943                "manual/reclassification",
944                "manual/accrual",
945                "manual/reversal",
946                "manual/correction",
947                "spreadsheet/upload",
948                "spreadsheet/journal",
949            ];
950            let idx = (rng.random::<u32>() as usize) % MANUAL.len();
951            return MANUAL[idx].to_string();
952        }
953
954        // Process-aware automated provenance. Each process has a small
955        // primary set; we also mix in cross-process codes ~20% of the
956        // time so the taxonomy stays diverse without losing coherence.
957        let primary: &[&str] = match bp {
958            BusinessProcess::P2P => &[
959                "SAP-MM/PO",
960                "SAP-MM/IV",
961                "SAP-MM/IM",
962                "SAP-FI/AP",
963                "Interface/EDI",
964            ],
965            BusinessProcess::O2C => &[
966                "SAP-SD/ORD",
967                "SAP-SD/DEL",
968                "SAP-SD/IV",
969                "SAP-FI/AR",
970                "Interface/Lockbox",
971            ],
972            BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
973            BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
974            BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
975            BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
976            BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
977            // R2R, S2C, Bank, Audit, Intercompany, ProjectAccounting, Esg
978            // → fall through to a generic mix.
979            _ => &[
980                "SAP-FI/GL",
981                "SAP-FI/AP",
982                "SAP-FI/AR",
983                "SAP-FI/AA",
984                "External/SubL",
985            ],
986        };
987
988        // 80% process-appropriate, 20% cross-process (pulled from a
989        // generic pool) so the categorical distribution has long tails.
990        const CROSS: &[&str] = &[
991            "SAP-FI/GL",
992            "SAP-FI/AP",
993            "SAP-FI/AR",
994            "Interface/EDI",
995            "Interface/Bank",
996            "External/SubL",
997        ];
998        let pool = if rng.random::<f64>() < 0.80 {
999            primary
1000        } else {
1001            CROSS
1002        };
1003        let idx = (rng.random::<u32>() as usize) % pool.len();
1004        pool[idx].to_string()
1005    }
1006
1007    fn determine_fraud(&mut self) -> Option<FraudType> {
1008        if !self.fraud_config.enabled {
1009            return None;
1010        }
1011
1012        // Roll for fraud based on fraud rate
1013        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
1014            return None;
1015        }
1016
1017        // Select fraud type based on distribution
1018        Some(self.select_fraud_type())
1019    }
1020
1021    /// Select a fraud type based on the configured distribution.
1022    fn select_fraud_type(&mut self) -> FraudType {
1023        let dist = &self.fraud_config.fraud_type_distribution;
1024        let roll: f64 = self.rng.random();
1025
1026        let mut cumulative = 0.0;
1027
1028        cumulative += dist.suspense_account_abuse;
1029        if roll < cumulative {
1030            return FraudType::SuspenseAccountAbuse;
1031        }
1032
1033        cumulative += dist.fictitious_transaction;
1034        if roll < cumulative {
1035            return FraudType::FictitiousTransaction;
1036        }
1037
1038        cumulative += dist.revenue_manipulation;
1039        if roll < cumulative {
1040            return FraudType::RevenueManipulation;
1041        }
1042
1043        cumulative += dist.expense_capitalization;
1044        if roll < cumulative {
1045            return FraudType::ExpenseCapitalization;
1046        }
1047
1048        cumulative += dist.split_transaction;
1049        if roll < cumulative {
1050            return FraudType::SplitTransaction;
1051        }
1052
1053        cumulative += dist.timing_anomaly;
1054        if roll < cumulative {
1055            return FraudType::TimingAnomaly;
1056        }
1057
1058        cumulative += dist.unauthorized_access;
1059        if roll < cumulative {
1060            return FraudType::UnauthorizedAccess;
1061        }
1062
1063        cumulative += dist.duplicate_payment;
1064        if roll < cumulative {
1065            return FraudType::DuplicatePayment;
1066        }
1067
1068        cumulative += dist.kickback_scheme;
1069        if roll < cumulative {
1070            return FraudType::KickbackScheme;
1071        }
1072
1073        cumulative += dist.round_tripping;
1074        if roll < cumulative {
1075            return FraudType::RoundTripping;
1076        }
1077
1078        cumulative += dist.unauthorized_discount;
1079        if roll < cumulative {
1080            return FraudType::UnauthorizedDiscount;
1081        }
1082
1083        // Fallback when distribution is sub-1.0 (validator allows tolerance)
1084        FraudType::DuplicatePayment
1085    }
1086
1087    /// Map a fraud type to an amount pattern for suspicious amounts.
1088    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1089        match fraud_type {
1090            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1091                FraudAmountPattern::ThresholdAdjacent
1092            }
1093            FraudType::FictitiousTransaction
1094            | FraudType::FictitiousEntry
1095            | FraudType::SuspenseAccountAbuse
1096            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1097            FraudType::RevenueManipulation
1098            | FraudType::ExpenseCapitalization
1099            | FraudType::ImproperCapitalization
1100            | FraudType::ReserveManipulation
1101            | FraudType::UnauthorizedAccess
1102            | FraudType::PrematureRevenue
1103            | FraudType::UnderstatedLiabilities
1104            | FraudType::OverstatedAssets
1105            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1106            FraudType::DuplicatePayment
1107            | FraudType::TimingAnomaly
1108            | FraudType::SelfApproval
1109            | FraudType::ExceededApprovalLimit
1110            | FraudType::SegregationOfDutiesViolation
1111            | FraudType::UnauthorizedApproval
1112            | FraudType::CollusiveApproval
1113            | FraudType::FictitiousVendor
1114            | FraudType::ShellCompanyPayment
1115            | FraudType::Kickback
1116            | FraudType::KickbackScheme
1117            | FraudType::UnauthorizedDiscount
1118            | FraudType::RoundTripping
1119            | FraudType::InvoiceManipulation
1120            | FraudType::AssetMisappropriation
1121            | FraudType::InventoryTheft
1122            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1123            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
1124            FraudType::ImproperRevenueRecognition
1125            | FraudType::ImproperPoAllocation
1126            | FraudType::VariableConsiderationManipulation
1127            | FraudType::ContractModificationMisstatement => {
1128                FraudAmountPattern::StatisticallyImprobable
1129            }
1130            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
1131            FraudType::LeaseClassificationManipulation
1132            | FraudType::OffBalanceSheetLease
1133            | FraudType::LeaseLiabilityUnderstatement
1134            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1135            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
1136            FraudType::FairValueHierarchyManipulation
1137            | FraudType::Level3InputManipulation
1138            | FraudType::ValuationTechniqueManipulation => {
1139                FraudAmountPattern::StatisticallyImprobable
1140            }
1141            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
1142            FraudType::DelayedImpairment
1143            | FraudType::ImpairmentTestAvoidance
1144            | FraudType::CashFlowProjectionManipulation
1145            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1146            // Sourcing/Procurement Fraud
1147            FraudType::BidRigging
1148            | FraudType::PhantomVendorContract
1149            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1150            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1151            // HR/Payroll Fraud
1152            FraudType::GhostEmployeePayroll
1153            | FraudType::PayrollInflation
1154            | FraudType::DuplicateExpenseReport
1155            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1156            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1157            // O2C Fraud
1158            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1159            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1160        }
1161    }
1162
1163    /// Generate a deterministic UUID using the factory.
1164    #[inline]
1165    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1166        self.uuid_factory.next()
1167    }
1168
1169    /// Cost center pool used for expense account enrichment.
1170    const COST_CENTER_POOL: &'static [&'static str] =
1171        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1172
1173    /// Enrich journal entry line items with account descriptions, cost centers,
1174    /// profit centers, value dates, line text, and assignment fields.
1175    ///
1176    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
1177    /// and `::credit()` leave as `None`.
1178    fn enrich_line_items(&self, entry: &mut JournalEntry) {
1179        let posting_date = entry.header.posting_date;
1180        let company_code = &entry.header.company_code;
1181        let header_text = entry.header.header_text.clone();
1182        let business_process = entry.header.business_process;
1183
1184        // Derive a deterministic index from the document_id for cost center selection
1185        let doc_id_bytes = entry.header.document_id.as_bytes();
1186        let mut cc_seed: usize = 0;
1187        for &b in doc_id_bytes {
1188            cc_seed = cc_seed.wrapping_add(b as usize);
1189        }
1190
1191        for (i, line) in entry.lines.iter_mut().enumerate() {
1192            // 1. account_description: look up from CoA
1193            if line.account_description.is_none() {
1194                line.account_description = self
1195                    .coa
1196                    .get_account(&line.gl_account)
1197                    .map(|a| a.short_description.clone());
1198            }
1199
1200            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
1201            //
1202            // When the orchestrator has provided a master-data-sourced
1203            // pool (`with_cost_center_pool`), pick from it so the value
1204            // joins back to `cost_centers.id`.  Otherwise fall back to
1205            // the legacy hardcoded `COST_CENTER_POOL` const.
1206            //
1207            // Selection within the pool is filtered to entries that
1208            // mention the entry's `company_code` (master IDs follow
1209            // the `CC-{company}-...` convention) so cross-company
1210            // contamination is avoided; if no pool entry matches the
1211            // company we fall through to the full pool.
1212            if line.cost_center.is_none() {
1213                let first_char = line.gl_account.chars().next().unwrap_or('0');
1214                if first_char == '5' || first_char == '6' {
1215                    if !self.cost_center_pool.is_empty() {
1216                        let needle = format!("-{company_code}-");
1217                        let candidates: Vec<&String> = self
1218                            .cost_center_pool
1219                            .iter()
1220                            .filter(|id| id.contains(&needle))
1221                            .collect();
1222                        let pool: Vec<&String> = if candidates.is_empty() {
1223                            self.cost_center_pool.iter().collect()
1224                        } else {
1225                            candidates
1226                        };
1227                        let idx = cc_seed.wrapping_add(i) % pool.len();
1228                        line.cost_center = Some(pool[idx].clone());
1229                    } else {
1230                        let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1231                        line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1232                    }
1233                }
1234            }
1235
1236            // 3. profit_center: assign from master pool when available
1237            // (`with_profit_center_pool`); otherwise derive from
1238            // company code + business process (legacy behaviour, which
1239            // does not match the master-data PC ID format).
1240            if line.profit_center.is_none() {
1241                if !self.profit_center_pool.is_empty() {
1242                    let needle = format!("-{company_code}-");
1243                    let candidates: Vec<&String> = self
1244                        .profit_center_pool
1245                        .iter()
1246                        .filter(|id| id.contains(&needle))
1247                        .collect();
1248                    let pool: Vec<&String> = if candidates.is_empty() {
1249                        self.profit_center_pool.iter().collect()
1250                    } else {
1251                        candidates
1252                    };
1253                    let idx = cc_seed.wrapping_add(i) % pool.len();
1254                    line.profit_center = Some(pool[idx].clone());
1255                } else {
1256                    let suffix = match business_process {
1257                        Some(BusinessProcess::P2P) => "-P2P",
1258                        Some(BusinessProcess::O2C) => "-O2C",
1259                        Some(BusinessProcess::R2R) => "-R2R",
1260                        Some(BusinessProcess::H2R) => "-H2R",
1261                        _ => "",
1262                    };
1263                    line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1264                }
1265            }
1266
1267            // 4. line_text: fall back to header_text if not already set
1268            if line.line_text.is_none() {
1269                line.line_text = header_text.clone();
1270            }
1271
1272            // 5. value_date: set to posting_date for AR/AP accounts
1273            if line.value_date.is_none()
1274                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1275            {
1276                line.value_date = Some(posting_date);
1277            }
1278
1279            // 6. assignment: set to vendor/customer reference for AP/AR lines
1280            if line.assignment.is_none() {
1281                if line.gl_account.starts_with("2000") {
1282                    // AP line - use vendor reference from header
1283                    if let Some(ref ht) = header_text {
1284                        // Try to extract vendor ID from header text patterns like "... - V-001"
1285                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
1286                            if vendor_part.starts_with("V-")
1287                                || vendor_part.starts_with("VENDOR")
1288                                || vendor_part.starts_with("Vendor")
1289                            {
1290                                line.assignment = Some(vendor_part.to_string());
1291                            }
1292                        }
1293                    }
1294                } else if line.gl_account.starts_with("1100") {
1295                    // AR line - use customer reference from header
1296                    if let Some(ref ht) = header_text {
1297                        if let Some(customer_part) = ht.rsplit(" - ").next() {
1298                            if customer_part.starts_with("C-")
1299                                || customer_part.starts_with("CUST")
1300                                || customer_part.starts_with("Customer")
1301                            {
1302                                line.assignment = Some(customer_part.to_string());
1303                            }
1304                        }
1305                    }
1306                }
1307            }
1308        }
1309    }
1310
1311    /// Generate a single journal entry.
1312    pub fn generate(&mut self) -> JournalEntry {
1313        debug!(
1314            count = self.count,
1315            companies = self.companies.len(),
1316            start_date = %self.start_date,
1317            end_date = %self.end_date,
1318            "Generating journal entry"
1319        );
1320
1321        // Check if we're in a batch - if so, generate a batched entry
1322        if let Some(ref state) = self.batch_state {
1323            if state.remaining > 0 {
1324                return self.generate_batched_entry();
1325            }
1326        }
1327
1328        self.count += 1;
1329
1330        // Generate deterministic document ID
1331        let document_id = self.generate_deterministic_uuid();
1332
1333        // Sample posting date
1334        let mut posting_date = self
1335            .temporal_sampler
1336            .sample_date(self.start_date, self.end_date);
1337
1338        // Adjust posting date to be a business day if business day calculator is configured
1339        if let Some(ref calc) = self.business_day_calculator {
1340            if !calc.is_business_day(posting_date) {
1341                // Move to next business day
1342                posting_date = calc.next_business_day(posting_date, false);
1343                // Ensure we don't exceed end_date
1344                if posting_date > self.end_date {
1345                    posting_date = calc.prev_business_day(self.end_date, true);
1346                }
1347            }
1348        }
1349
1350        // Select company using weighted selector
1351        let company_code = self.company_selector.select(&mut self.rng).to_string();
1352
1353        // v4.1.0+: draw a single (u, v) pair from the copula — cached for
1354        // both the amount adjustment (u) and the line-count shift (v).
1355        // None when no copula is configured.
1356        let copula_uv: Option<(f64, f64)> =
1357            self.correlation_copula.as_mut().map(|cop| cop.sample());
1358
1359        // Sample line item specification. When a copula is configured,
1360        // v drives line-count via a quantile-preserving map: integer
1361        // count `2 + floor(v * 10)` gives range [2, 11] evenly spaced
1362        // in v, so rank(v) == rank(line_count).
1363        //
1364        // v4.1.6+: upgraded from the v3.5.4 nudge (shift around
1365        // independently-drawn count) to true rank-preserving quantile
1366        // inversion, so empirical Kendall-τ now matches copula theory.
1367        let mut line_spec = self.line_sampler.sample();
1368        if let Some((_u, v)) = copula_uv {
1369            let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
1370            let old_debit = line_spec.debit_count.max(1);
1371            let old_credit = line_spec.credit_count.max(1);
1372            let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
1373                .round() as usize;
1374            let new_debit = new_debit.clamp(1, new_total - 1);
1375            let new_credit = new_total - new_debit;
1376            line_spec.total_count = new_total;
1377            line_spec.debit_count = new_debit;
1378            line_spec.credit_count = new_credit;
1379        }
1380
1381        // Determine source type using full 4-way distribution
1382        let source = self.select_source();
1383        let is_automated = matches!(
1384            source,
1385            TransactionSource::Automated | TransactionSource::Recurring
1386        );
1387
1388        // Select business process
1389        let business_process = self.select_business_process();
1390
1391        // Determine if this is a fraudulent transaction
1392        let fraud_type = self.determine_fraud();
1393        let is_fraud = fraud_type.is_some();
1394
1395        // Sample time based on source
1396        let time = self.temporal_sampler.sample_time(!is_automated);
1397        let created_at = posting_date.and_time(time).and_utc();
1398
1399        // Select user from pool or generate generic
1400        let (created_by, user_persona) = self.select_user(is_automated);
1401
1402        // Create header with deterministic UUID
1403        let mut header =
1404            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1405        header.created_at = created_at;
1406        header.source = source;
1407        header.created_by = created_by;
1408        header.user_persona = user_persona;
1409        header.business_process = Some(business_process);
1410        header.document_type = Self::document_type_for_process(business_process).to_string();
1411        header.is_fraud = is_fraud;
1412        header.fraud_type = fraud_type;
1413
1414        // --- ISA 240 audit flags ---
1415        let is_manual = matches!(source, TransactionSource::Manual);
1416        header.is_manual = is_manual;
1417
1418        // Determine source_system based on manual vs automated.
1419        //
1420        // Real ERPs typically expose 20+ distinct provenance codes per
1421        // company (one per module + sub-module + interface). The taxonomy
1422        // below is a strict superset of the legacy {manual, spreadsheet,
1423        // SAP-FI, SAP-MM, SAP-SD, interface, SAP-HR} codes so downstream
1424        // consumers that filter by prefix (e.g. `starts_with("SAP-")`)
1425        // continue to work.
1426        //
1427        // Contract preserved by the generator-level audit assertion in
1428        // `test_isa240_audit_flags_populated`:
1429        //   - manual entries → starts_with("manual") || starts_with("spreadsheet")
1430        //   - automated entries → does NOT start with "manual"/"spreadsheet"
1431        header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
1432
1433        // is_post_close: entry is in the last month of the configured period
1434        // and the posting date falls after the 25th (simulating close cutoff)
1435        let is_post_close = posting_date.month() == self.end_date.month()
1436            && posting_date.year() == self.end_date.year()
1437            && posting_date.day() > 25;
1438        header.is_post_close = is_post_close;
1439
1440        // created_date: for manual entries, same day as posting; for automated,
1441        // 0-3 days before posting_date
1442        let created_date = if is_manual {
1443            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1444        } else {
1445            let lag_days = self.rng.random_range(0i64..=3);
1446            let created_naive_date = posting_date
1447                .checked_sub_signed(chrono::Duration::days(lag_days))
1448                .unwrap_or(posting_date);
1449            created_naive_date.and_hms_opt(
1450                self.rng.random_range(8u32..=17),
1451                self.rng.random_range(0u32..=59),
1452                self.rng.random_range(0u32..=59),
1453            )
1454        };
1455        header.created_date = created_date;
1456
1457        // Generate description context
1458        let mut context =
1459            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1460
1461        // Add vendor/customer context based on business process
1462        match business_process {
1463            BusinessProcess::P2P => {
1464                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1465                    context.vendor_name = Some(vendor.name.clone());
1466                }
1467            }
1468            BusinessProcess::O2C => {
1469                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1470                    context.customer_name = Some(customer.name.clone());
1471                }
1472            }
1473            _ => {}
1474        }
1475
1476        // Generate header text if enabled
1477        if self.template_config.descriptions.generate_header_text {
1478            header.header_text = Some(self.description_generator.generate_header_text(
1479                business_process,
1480                &context,
1481                &mut self.rng,
1482            ));
1483        }
1484
1485        // Generate reference if enabled
1486        if self.template_config.references.generate_references {
1487            header.reference = Some(
1488                self.reference_generator
1489                    .generate_for_process_year(business_process, posting_date.year()),
1490            );
1491        }
1492
1493        // Derive typed source document from reference prefix
1494        header.source_document = header
1495            .reference
1496            .as_deref()
1497            .and_then(DocumentRef::parse)
1498            .or_else(|| {
1499                if header.source == TransactionSource::Manual {
1500                    Some(DocumentRef::Manual)
1501                } else {
1502                    None
1503                }
1504            });
1505
1506        // Generate line items
1507        let mut entry = JournalEntry::new(header);
1508
1509        // Generate amount - use fraud pattern if this is a fraudulent transaction.
1510        // Non-fraud path prefers the v3.4.0 advanced sampler when configured; fraud
1511        // patterns always use the legacy sampler because they target specific
1512        // thresholds (round numbers, just-under-approval amounts) that are
1513        // orthogonal to mixture models.
1514        let base_amount = if let Some(ft) = fraud_type {
1515            let pattern = self.fraud_type_to_amount_pattern(ft);
1516            self.amount_sampler.sample_fraud(pattern)
1517        } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1518            adv.sample_decimal()
1519        } else {
1520            self.amount_sampler.sample()
1521        };
1522        // v3.5.3+: if a conditional-amount override is configured and
1523        // the JE is non-fraud, re-sample the amount from the conditional
1524        // distribution using the computed context. Fraud entries bypass
1525        // this path to preserve fraud-pattern semantics (as with the
1526        // advanced sampler cascade above).
1527        let base_amount = if fraud_type.is_none() {
1528            // Compute input context BEFORE taking &mut on the sampler
1529            // to avoid borrow-checker conflict with the immutable
1530            // `conditional_input_value` call.
1531            let input = self.conditional_input_value(posting_date);
1532            if let Some(ref mut cond) = self.conditional_amount_override {
1533                cond.sample_decimal(input)
1534            } else {
1535                base_amount
1536            }
1537        } else {
1538            base_amount
1539        };
1540
1541        // v4.1.6+: if a copula is configured AND an advanced amount
1542        // sampler with a ppf is available, use true rank-preserving
1543        // inverse-CDF sampling — amount is drawn DIRECTLY from the
1544        // sampler's quantile at `u`, replacing (not nudging) the
1545        // independently-drawn base_amount. This makes empirical
1546        // Kendall-τ match the copula's theoretical τ.
1547        //
1548        // Fallback for copula-without-advanced-sampler: keep the
1549        // v4.1.0 log-scale multiplier nudge (observable correlation,
1550        // diluted magnitude).
1551        let base_amount = if fraud_type.is_none() {
1552            if let Some((u, _v)) = copula_uv {
1553                if let Some(ref adv) = self.advanced_amount_sampler {
1554                    adv.ppf_decimal(u)
1555                } else {
1556                    let log_mult = 4.0 * (u - 0.5);
1557                    let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
1558                    Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1559                }
1560            } else {
1561                base_amount
1562            }
1563        } else {
1564            base_amount
1565        };
1566
1567        // Apply temporal drift if configured
1568        let drift_adjusted_amount = {
1569            let drift = self.get_drift_adjustments(posting_date);
1570            if drift.amount_mean_multiplier != 1.0 {
1571                // Apply drift multiplier (includes seasonal factor if enabled)
1572                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1573                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1574                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1575            } else {
1576                base_amount
1577            }
1578        };
1579
1580        // Apply human variation to amounts for non-automated transactions
1581        let total_amount = if is_automated {
1582            drift_adjusted_amount // Automated systems use exact amounts
1583        } else {
1584            self.apply_human_variation(drift_adjusted_amount)
1585        };
1586
1587        // Generate debit lines
1588        let debit_amounts = self
1589            .amount_sampler
1590            .sample_summing_to(line_spec.debit_count, total_amount);
1591        for (i, amount) in debit_amounts.into_iter().enumerate() {
1592            let account_number = self.select_debit_account().account_number.clone();
1593            let mut line = JournalEntryLine::debit(
1594                entry.header.document_id,
1595                (i + 1) as u32,
1596                account_number.clone(),
1597                amount,
1598            );
1599
1600            // Generate line text if enabled
1601            if self.template_config.descriptions.generate_line_text {
1602                line.line_text = Some(self.description_generator.generate_line_text(
1603                    &account_number,
1604                    &context,
1605                    &mut self.rng,
1606                ));
1607            }
1608
1609            entry.add_line(line);
1610        }
1611
1612        // Generate credit lines - use the SAME amounts to ensure balance
1613        let credit_amounts = self
1614            .amount_sampler
1615            .sample_summing_to(line_spec.credit_count, total_amount);
1616        for (i, amount) in credit_amounts.into_iter().enumerate() {
1617            let account_number = self.select_credit_account().account_number.clone();
1618            let mut line = JournalEntryLine::credit(
1619                entry.header.document_id,
1620                (line_spec.debit_count + i + 1) as u32,
1621                account_number.clone(),
1622                amount,
1623            );
1624
1625            // Generate line text if enabled
1626            if self.template_config.descriptions.generate_line_text {
1627                line.line_text = Some(self.description_generator.generate_line_text(
1628                    &account_number,
1629                    &context,
1630                    &mut self.rng,
1631                ));
1632            }
1633
1634            entry.add_line(line);
1635        }
1636
1637        // Enrich line items with account descriptions, cost centers, etc.
1638        self.enrich_line_items(&mut entry);
1639
1640        // Apply persona-based errors if enabled and it's a human user
1641        if self.persona_errors_enabled && !is_automated {
1642            self.maybe_inject_persona_error(&mut entry);
1643        }
1644
1645        // Apply approval workflow if enabled and amount exceeds threshold
1646        if self.approval_enabled {
1647            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1648        }
1649
1650        // Populate approved_by / approval_date from the approval workflow
1651        self.populate_approval_fields(&mut entry, posting_date);
1652
1653        // Maybe start a batch of similar entries for realism
1654        self.maybe_start_batch(&entry);
1655
1656        entry
1657    }
1658
1659    /// Enable or disable persona-based error injection.
1660    ///
1661    /// When enabled, entries created by human personas have a chance
1662    /// to contain realistic human errors based on their experience level.
1663    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1664        self.persona_errors_enabled = enabled;
1665        self
1666    }
1667
1668    /// Set fraud configuration for fraud injection.
1669    ///
1670    /// When fraud is enabled in the config, transactions have a chance
1671    /// to be marked as fraudulent based on the configured fraud rate.
1672    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1673        self.fraud_config = config;
1674        self
1675    }
1676
1677    /// Check if persona errors are enabled.
1678    pub fn persona_errors_enabled(&self) -> bool {
1679        self.persona_errors_enabled
1680    }
1681
1682    /// Enable or disable batch processing behavior.
1683    ///
1684    /// When enabled (default), the generator will occasionally produce batches
1685    /// of similar entries, simulating how humans batch similar work together.
1686    pub fn with_batching(mut self, enabled: bool) -> Self {
1687        if !enabled {
1688            self.batch_state = None;
1689        }
1690        self
1691    }
1692
1693    /// Check if batch processing is enabled.
1694    pub fn batching_enabled(&self) -> bool {
1695        // Batching is implicitly enabled when not explicitly disabled
1696        true
1697    }
1698
1699    /// Maybe start a batch based on the current entry.
1700    ///
1701    /// Humans often batch similar work: processing invoices from one vendor,
1702    /// entering expense reports for a trip, reconciling similar items.
1703    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1704        // Only start batch for non-automated, non-fraud entries
1705        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1706            return;
1707        }
1708
1709        // 15% chance to start a batch (most work is not batched)
1710        if self.rng.random::<f64>() > 0.15 {
1711            return;
1712        }
1713
1714        // Extract key attributes for batching
1715        let base_account = entry
1716            .lines
1717            .first()
1718            .map(|l| l.gl_account.clone())
1719            .unwrap_or_default();
1720
1721        let base_amount = entry.total_debit();
1722
1723        self.batch_state = Some(BatchState {
1724            base_account_number: base_account,
1725            base_amount,
1726            base_business_process: entry.header.business_process,
1727            base_posting_date: entry.header.posting_date,
1728            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1729        });
1730    }
1731
1732    /// Generate an entry that's part of the current batch.
1733    ///
1734    /// Batched entries have:
1735    /// - Same or very similar business process
1736    /// - Same posting date (batched work done together)
1737    /// - Similar amounts (within ±15%)
1738    /// - Same debit account (processing similar items)
1739    fn generate_batched_entry(&mut self) -> JournalEntry {
1740        use rust_decimal::Decimal;
1741
1742        // Decrement batch counter
1743        if let Some(ref mut state) = self.batch_state {
1744            state.remaining = state.remaining.saturating_sub(1);
1745        }
1746
1747        let Some(batch) = self.batch_state.clone() else {
1748            // This is a programming error - batch_state should be set before calling this method.
1749            // Clear state and fall back to generating a standard entry instead of panicking.
1750            tracing::warn!(
1751                "generate_batched_entry called without batch_state; generating standard entry"
1752            );
1753            self.batch_state = None;
1754            return self.generate();
1755        };
1756
1757        // Use the batch's posting date (work done on same day)
1758        let posting_date = batch.base_posting_date;
1759
1760        self.count += 1;
1761        let document_id = self.generate_deterministic_uuid();
1762
1763        // Select same company (batched work is usually same company)
1764        let company_code = self.company_selector.select(&mut self.rng).to_string();
1765
1766        // Use simplified line spec for batched entries (usually 2-line)
1767        let _line_spec = LineItemSpec {
1768            total_count: 2,
1769            debit_count: 1,
1770            credit_count: 1,
1771            split_type: DebitCreditSplit::Equal,
1772        };
1773
1774        // Batched entries are always manual
1775        let source = TransactionSource::Manual;
1776
1777        // Use the batch's business process
1778        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1779
1780        // Sample time
1781        let time = self.temporal_sampler.sample_time(true);
1782        let created_at = posting_date.and_time(time).and_utc();
1783
1784        // Same user for batched work
1785        let (created_by, user_persona) = self.select_user(false);
1786
1787        // Create header
1788        let mut header =
1789            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1790        header.created_at = created_at;
1791        header.source = source;
1792        header.created_by = created_by;
1793        header.user_persona = user_persona;
1794        header.business_process = Some(business_process);
1795        header.document_type = Self::document_type_for_process(business_process).to_string();
1796
1797        // Batched manual entries have Manual source document
1798        header.source_document = Some(DocumentRef::Manual);
1799
1800        // ISA 240 audit flags for batched entries (always manual)
1801        header.is_manual = true;
1802        header.source_system = if self.rng.random::<f64>() < 0.70 {
1803            "manual".to_string()
1804        } else {
1805            "spreadsheet".to_string()
1806        };
1807        header.is_post_close = posting_date.month() == self.end_date.month()
1808            && posting_date.year() == self.end_date.year()
1809            && posting_date.day() > 25;
1810        header.created_date =
1811            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1812
1813        // Generate similar amount (within ±15% of base)
1814        let variation = self.rng.random_range(-0.15..0.15);
1815        let varied_amount =
1816            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1817        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1818
1819        // Create the entry
1820        let mut entry = JournalEntry::new(header);
1821
1822        // Use same debit account as batch base
1823        let debit_line = JournalEntryLine::debit(
1824            entry.header.document_id,
1825            1,
1826            batch.base_account_number.clone(),
1827            total_amount,
1828        );
1829        entry.add_line(debit_line);
1830
1831        // Select a credit account
1832        let credit_account = self.select_credit_account().account_number.clone();
1833        let credit_line =
1834            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1835        entry.add_line(credit_line);
1836
1837        // Enrich line items with account descriptions, cost centers, etc.
1838        self.enrich_line_items(&mut entry);
1839
1840        // Apply persona-based errors if enabled
1841        if self.persona_errors_enabled {
1842            self.maybe_inject_persona_error(&mut entry);
1843        }
1844
1845        // Apply approval workflow if enabled
1846        if self.approval_enabled {
1847            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1848        }
1849
1850        // Populate approved_by / approval_date from the approval workflow
1851        self.populate_approval_fields(&mut entry, posting_date);
1852
1853        // Clear batch state if no more entries remaining
1854        if batch.remaining <= 1 {
1855            self.batch_state = None;
1856        }
1857
1858        entry
1859    }
1860
1861    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1862    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1863        // Parse persona from the entry header
1864        let persona_str = &entry.header.user_persona;
1865        let persona = match persona_str.to_lowercase().as_str() {
1866            s if s.contains("junior") => UserPersona::JuniorAccountant,
1867            s if s.contains("senior") => UserPersona::SeniorAccountant,
1868            s if s.contains("controller") => UserPersona::Controller,
1869            s if s.contains("manager") => UserPersona::Manager,
1870            s if s.contains("executive") => UserPersona::Executive,
1871            _ => return, // Don't inject errors for unknown personas
1872        };
1873
1874        // Get base error rate from persona
1875        let base_error_rate = persona.error_rate();
1876
1877        // Apply stress factors based on posting date
1878        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1879
1880        // Check if error should occur based on adjusted rate
1881        if self.rng.random::<f64>() >= adjusted_rate {
1882            return; // No error this time
1883        }
1884
1885        // Select and inject persona-appropriate error
1886        self.inject_human_error(entry, persona);
1887    }
1888
1889    /// Apply contextual stress factors to the base error rate.
1890    ///
1891    /// Stress factors increase error likelihood during:
1892    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1893    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1894    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1895    /// - Monday morning (catch-up work): 20% more errors
1896    /// - Friday afternoon (rushing to leave): 30% more errors
1897    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1898        use chrono::Datelike;
1899
1900        let mut rate = base_rate;
1901        let day = posting_date.day();
1902        let month = posting_date.month();
1903
1904        // Year-end stress (December 28-31): double the error rate
1905        if month == 12 && day >= 28 {
1906            rate *= 2.0;
1907            return rate.min(0.5); // Cap at 50% to keep it realistic
1908        }
1909
1910        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1911        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1912            rate *= 1.75; // 75% more errors at quarter end
1913            return rate.min(0.4);
1914        }
1915
1916        // Month-end stress (last 3 days of month)
1917        if day >= 28 {
1918            rate *= 1.5; // 50% more errors at month end
1919        }
1920
1921        // Day-of-week stress effects
1922        let weekday = posting_date.weekday();
1923        match weekday {
1924            chrono::Weekday::Mon => {
1925                // Monday: catching up, often rushed
1926                rate *= 1.2;
1927            }
1928            chrono::Weekday::Fri => {
1929                // Friday: rushing to finish before weekend
1930                rate *= 1.3;
1931            }
1932            _ => {}
1933        }
1934
1935        // Cap at 40% to keep it realistic
1936        rate.min(0.4)
1937    }
1938
1939    /// Apply human-like variation to an amount.
1940    ///
1941    /// Humans don't enter perfectly calculated amounts - they:
1942    /// - Round amounts differently
1943    /// - Estimate instead of calculating exactly
1944    /// - Make small input variations
1945    ///
1946    /// This applies small variations (typically ±2%) to make amounts more realistic.
1947    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1948        use rust_decimal::Decimal;
1949
1950        // Automated transactions or very small amounts don't get variation
1951        if amount < Decimal::from(10) {
1952            return amount;
1953        }
1954
1955        // 70% chance of human variation being applied
1956        if self.rng.random::<f64>() > 0.70 {
1957            return amount;
1958        }
1959
1960        // Decide which type of human variation to apply
1961        let variation_type: u8 = self.rng.random_range(0..4);
1962
1963        match variation_type {
1964            0 => {
1965                // ±2% variation (common for estimated amounts)
1966                let variation_pct = self.rng.random_range(-0.02..0.02);
1967                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1968                (amount + variation).round_dp(2)
1969            }
1970            1 => {
1971                // Round to nearest $10
1972                let ten = Decimal::from(10);
1973                (amount / ten).round() * ten
1974            }
1975            2 => {
1976                // Round to nearest $100 (for larger amounts)
1977                if amount >= Decimal::from(500) {
1978                    let hundred = Decimal::from(100);
1979                    (amount / hundred).round() * hundred
1980                } else {
1981                    amount
1982                }
1983            }
1984            3 => {
1985                // Slight under/over payment (±$0.01 to ±$1.00)
1986                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1987                (amount + cents).max(Decimal::ZERO).round_dp(2)
1988            }
1989            _ => amount,
1990        }
1991    }
1992
1993    /// Rebalance an entry after a one-sided amount modification.
1994    ///
1995    /// When an error modifies one line's amount, this finds a line on the opposite
1996    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1997    /// same impact to maintain balance.
1998    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1999        // Find a line on the opposite side to adjust
2000        let balancing_idx = entry.lines.iter().position(|l| {
2001            if modified_was_debit {
2002                l.credit_amount > Decimal::ZERO
2003            } else {
2004                l.debit_amount > Decimal::ZERO
2005            }
2006        });
2007
2008        if let Some(idx) = balancing_idx {
2009            if modified_was_debit {
2010                entry.lines[idx].credit_amount += impact;
2011            } else {
2012                entry.lines[idx].debit_amount += impact;
2013            }
2014        }
2015    }
2016
2017    /// Inject a human-like error based on the persona.
2018    ///
2019    /// All error types maintain balance - amount modifications are applied to both sides.
2020    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
2021    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
2022        use rust_decimal::Decimal;
2023
2024        // Different personas make different types of errors
2025        let error_type: u8 = match persona {
2026            UserPersona::JuniorAccountant => {
2027                // Junior accountants make more varied errors
2028                self.rng.random_range(0..5)
2029            }
2030            UserPersona::SeniorAccountant => {
2031                // Senior accountants mainly make transposition errors
2032                self.rng.random_range(0..3)
2033            }
2034            UserPersona::Controller | UserPersona::Manager => {
2035                // Controllers/managers mainly make rounding or cutoff errors
2036                self.rng.random_range(3..5)
2037            }
2038            _ => return,
2039        };
2040
2041        match error_type {
2042            0 => {
2043                // Transposed digits in an amount
2044                if let Some(line) = entry.lines.get_mut(0) {
2045                    let is_debit = line.debit_amount > Decimal::ZERO;
2046                    let original_amount = if is_debit {
2047                        line.debit_amount
2048                    } else {
2049                        line.credit_amount
2050                    };
2051
2052                    // Simple digit swap in the string representation
2053                    let s = original_amount.to_string();
2054                    if s.len() >= 2 {
2055                        let chars: Vec<char> = s.chars().collect();
2056                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
2057                        if chars[pos].is_ascii_digit()
2058                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
2059                        {
2060                            let mut new_chars = chars;
2061                            new_chars.swap(pos, pos + 1);
2062                            if let Ok(new_amount) =
2063                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
2064                            {
2065                                let impact = new_amount - original_amount;
2066
2067                                // Apply to the modified line
2068                                if is_debit {
2069                                    entry.lines[0].debit_amount = new_amount;
2070                                } else {
2071                                    entry.lines[0].credit_amount = new_amount;
2072                                }
2073
2074                                // Rebalance the entry
2075                                Self::rebalance_entry(entry, is_debit, impact);
2076
2077                                entry.header.header_text = Some(
2078                                    entry.header.header_text.clone().unwrap_or_default()
2079                                        + " [HUMAN_ERROR:TRANSPOSITION]",
2080                                );
2081                            }
2082                        }
2083                    }
2084                }
2085            }
2086            1 => {
2087                // Wrong decimal place (off by factor of 10)
2088                if let Some(line) = entry.lines.get_mut(0) {
2089                    let is_debit = line.debit_amount > Decimal::ZERO;
2090                    let original_amount = if is_debit {
2091                        line.debit_amount
2092                    } else {
2093                        line.credit_amount
2094                    };
2095
2096                    let new_amount = original_amount * Decimal::new(10, 0);
2097                    let impact = new_amount - original_amount;
2098
2099                    // Apply to the modified line
2100                    if is_debit {
2101                        entry.lines[0].debit_amount = new_amount;
2102                    } else {
2103                        entry.lines[0].credit_amount = new_amount;
2104                    }
2105
2106                    // Rebalance the entry
2107                    Self::rebalance_entry(entry, is_debit, impact);
2108
2109                    entry.header.header_text = Some(
2110                        entry.header.header_text.clone().unwrap_or_default()
2111                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
2112                    );
2113                }
2114            }
2115            2 => {
2116                // Typo in description (doesn't affect balance)
2117                if let Some(ref mut text) = entry.header.header_text {
2118                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
2119                    let correct = ["the", "and", "with", "that", "receive"];
2120                    let idx = self.rng.random_range(0..typos.len());
2121                    if text.to_lowercase().contains(correct[idx]) {
2122                        *text = text.replace(correct[idx], typos[idx]);
2123                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
2124                    }
2125                }
2126            }
2127            3 => {
2128                // Rounding to round number
2129                if let Some(line) = entry.lines.get_mut(0) {
2130                    let is_debit = line.debit_amount > Decimal::ZERO;
2131                    let original_amount = if is_debit {
2132                        line.debit_amount
2133                    } else {
2134                        line.credit_amount
2135                    };
2136
2137                    let new_amount =
2138                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
2139                    let impact = new_amount - original_amount;
2140
2141                    // Apply to the modified line
2142                    if is_debit {
2143                        entry.lines[0].debit_amount = new_amount;
2144                    } else {
2145                        entry.lines[0].credit_amount = new_amount;
2146                    }
2147
2148                    // Rebalance the entry
2149                    Self::rebalance_entry(entry, is_debit, impact);
2150
2151                    entry.header.header_text = Some(
2152                        entry.header.header_text.clone().unwrap_or_default()
2153                            + " [HUMAN_ERROR:ROUNDED]",
2154                    );
2155                }
2156            }
2157            // Late posting marker (document date much earlier than posting
2158            // date). Doesn't create an imbalance.
2159            4 if entry.header.document_date == entry.header.posting_date => {
2160                let days_late = self.rng.random_range(5..15);
2161                entry.header.document_date =
2162                    entry.header.posting_date - chrono::Duration::days(days_late);
2163                entry.header.header_text = Some(
2164                    entry.header.header_text.clone().unwrap_or_default()
2165                        + " [HUMAN_ERROR:LATE_POSTING]",
2166                );
2167            }
2168            _ => {}
2169        }
2170    }
2171
2172    /// Apply approval workflow for high-value transactions.
2173    ///
2174    /// If the entry amount exceeds the approval threshold, simulate an
2175    /// approval workflow with appropriate approvers based on amount.
2176    fn maybe_apply_approval_workflow(
2177        &mut self,
2178        entry: &mut JournalEntry,
2179        _posting_date: NaiveDate,
2180    ) {
2181        use rust_decimal::Decimal;
2182
2183        let amount = entry.total_debit();
2184
2185        // Skip if amount is below threshold
2186        if amount <= self.approval_threshold {
2187            // Auto-approved below threshold
2188            let workflow = ApprovalWorkflow::auto_approved(
2189                entry.header.created_by.clone(),
2190                entry.header.user_persona.clone(),
2191                amount,
2192                entry.header.created_at,
2193            );
2194            entry.header.approval_workflow = Some(workflow);
2195            return;
2196        }
2197
2198        // Mark as SOX relevant for high-value transactions
2199        entry.header.sox_relevant = true;
2200
2201        // Determine required approval levels based on amount
2202        let required_levels = if amount > Decimal::new(100000, 0) {
2203            3 // Executive approval required
2204        } else if amount > Decimal::new(50000, 0) {
2205            2 // Senior management approval
2206        } else {
2207            1 // Manager approval
2208        };
2209
2210        // Create the approval workflow
2211        let mut workflow = ApprovalWorkflow::new(
2212            entry.header.created_by.clone(),
2213            entry.header.user_persona.clone(),
2214            amount,
2215        );
2216        workflow.required_levels = required_levels;
2217
2218        // Simulate submission
2219        let submit_time = entry.header.created_at;
2220        let submit_action = ApprovalAction::new(
2221            entry.header.created_by.clone(),
2222            entry.header.user_persona.clone(),
2223            self.parse_persona(&entry.header.user_persona),
2224            ApprovalActionType::Submit,
2225            0,
2226        )
2227        .with_timestamp(submit_time);
2228
2229        workflow.actions.push(submit_action);
2230        workflow.status = ApprovalStatus::Pending;
2231        workflow.submitted_at = Some(submit_time);
2232
2233        // Simulate approvals with realistic delays
2234        let mut current_time = submit_time;
2235        for level in 1..=required_levels {
2236            // Add delay for approval (1-3 business hours per level)
2237            let delay_hours = self.rng.random_range(1..4);
2238            current_time += chrono::Duration::hours(delay_hours);
2239
2240            // Skip weekends
2241            while current_time.weekday() == chrono::Weekday::Sat
2242                || current_time.weekday() == chrono::Weekday::Sun
2243            {
2244                current_time += chrono::Duration::days(1);
2245            }
2246
2247            // Generate approver based on level
2248            let (approver_id, approver_role) = self.select_approver(level);
2249
2250            let approve_action = ApprovalAction::new(
2251                approver_id.clone(),
2252                approver_role.to_string(),
2253                approver_role,
2254                ApprovalActionType::Approve,
2255                level,
2256            )
2257            .with_timestamp(current_time);
2258
2259            workflow.actions.push(approve_action);
2260            workflow.current_level = level;
2261        }
2262
2263        // Mark as approved
2264        workflow.status = ApprovalStatus::Approved;
2265        workflow.approved_at = Some(current_time);
2266
2267        entry.header.approval_workflow = Some(workflow);
2268    }
2269
2270    /// Select an approver based on the required level.
2271    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
2272        let persona = match level {
2273            1 => UserPersona::Manager,
2274            2 => UserPersona::Controller,
2275            _ => UserPersona::Executive,
2276        };
2277
2278        // Try to get from user pool first
2279        if let Some(ref pool) = self.user_pool {
2280            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2281                return (user.user_id.clone(), persona);
2282            }
2283        }
2284
2285        // Fallback to generated approver
2286        let approver_id = match persona {
2287            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2288            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2289            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2290            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2291        };
2292
2293        (approver_id, persona)
2294    }
2295
2296    /// Parse user persona from string.
2297    fn parse_persona(&self, persona_str: &str) -> UserPersona {
2298        match persona_str.to_lowercase().as_str() {
2299            s if s.contains("junior") => UserPersona::JuniorAccountant,
2300            s if s.contains("senior") => UserPersona::SeniorAccountant,
2301            s if s.contains("controller") => UserPersona::Controller,
2302            s if s.contains("manager") => UserPersona::Manager,
2303            s if s.contains("executive") => UserPersona::Executive,
2304            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2305            _ => UserPersona::JuniorAccountant, // Default
2306        }
2307    }
2308
2309    /// Enable or disable approval workflow.
2310    pub fn with_approval(mut self, enabled: bool) -> Self {
2311        self.approval_enabled = enabled;
2312        self
2313    }
2314
2315    /// Set the approval threshold amount.
2316    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2317        self.approval_threshold = threshold;
2318        self
2319    }
2320
2321    /// Set the SOD violation rate for approval tracking.
2322    ///
2323    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
2324    /// that the approver is the same as the creator, which constitutes a SOD violation.
2325    /// Default is 0.10 (10%).
2326    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2327        self.sod_violation_rate = rate;
2328        self
2329    }
2330
2331    /// Populate `approved_by` and `approval_date` from the approval workflow,
2332    /// and flag SOD violations when the approver matches the creator.
2333    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2334        if let Some(ref workflow) = entry.header.approval_workflow {
2335            // Extract the last approver from the workflow actions
2336            let last_approver = workflow
2337                .actions
2338                .iter()
2339                .rev()
2340                .find(|a| matches!(a.action, ApprovalActionType::Approve));
2341
2342            if let Some(approver_action) = last_approver {
2343                entry.header.approved_by = Some(approver_action.actor_id.clone());
2344                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2345            } else {
2346                // No explicit approver (auto-approved); use the preparer
2347                entry.header.approved_by = Some(workflow.preparer_id.clone());
2348                entry.header.approval_date = Some(posting_date);
2349            }
2350
2351            // Inject SOD violation: with configured probability, set approver = creator
2352            if self.rng.random::<f64>() < self.sod_violation_rate {
2353                let creator = entry.header.created_by.clone();
2354                entry.header.approved_by = Some(creator);
2355                entry.header.sod_violation = true;
2356                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2357            }
2358        }
2359    }
2360
2361    /// Set the temporal drift controller for simulating distribution changes over time.
2362    ///
2363    /// When drift is enabled, amounts and other distributions will shift based on
2364    /// the period (month) to simulate realistic temporal evolution like inflation
2365    /// or increasing fraud rates.
2366    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2367        self.drift_controller = Some(controller);
2368        self
2369    }
2370
2371    /// Set drift configuration directly.
2372    ///
2373    /// Creates a drift controller from the config. Total periods is calculated
2374    /// from the date range.
2375    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2376        if config.enabled {
2377            let total_periods = self.calculate_total_periods();
2378            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2379        }
2380        self
2381    }
2382
2383    /// Calculate total periods (months) in the date range.
2384    fn calculate_total_periods(&self) -> u32 {
2385        let start_year = self.start_date.year();
2386        let start_month = self.start_date.month();
2387        let end_year = self.end_date.year();
2388        let end_month = self.end_date.month();
2389
2390        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2391    }
2392
2393    /// Calculate the period number (0-indexed) for a given date.
2394    fn date_to_period(&self, date: NaiveDate) -> u32 {
2395        let start_year = self.start_date.year();
2396        let start_month = self.start_date.month() as i32;
2397        let date_year = date.year();
2398        let date_month = date.month() as i32;
2399
2400        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2401    }
2402
2403    /// Get drift adjustments for a given date.
2404    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2405        if let Some(ref controller) = self.drift_controller {
2406            let period = self.date_to_period(date);
2407            controller.compute_adjustments(period)
2408        } else {
2409            DriftAdjustments::none()
2410        }
2411    }
2412
2413    /// Select a user from the pool or generate a generic user ID.
2414    #[inline]
2415    fn select_user(&mut self, is_automated: bool) -> (String, String) {
2416        if let Some(ref pool) = self.user_pool {
2417            let persona = if is_automated {
2418                UserPersona::AutomatedSystem
2419            } else {
2420                // Random distribution among human personas
2421                let roll: f64 = self.rng.random();
2422                if roll < 0.4 {
2423                    UserPersona::JuniorAccountant
2424                } else if roll < 0.7 {
2425                    UserPersona::SeniorAccountant
2426                } else if roll < 0.85 {
2427                    UserPersona::Controller
2428                } else {
2429                    UserPersona::Manager
2430                }
2431            };
2432
2433            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2434                return (user.user_id.clone(), user.persona.to_string());
2435            }
2436        }
2437
2438        // Fallback to generic format
2439        if is_automated {
2440            (
2441                format!("BATCH{:04}", self.rng.random_range(1..=20)),
2442                "automated_system".to_string(),
2443            )
2444        } else {
2445            (
2446                format!("USER{:04}", self.rng.random_range(1..=40)),
2447                "senior_accountant".to_string(),
2448            )
2449        }
2450    }
2451
2452    /// Select transaction source based on configuration weights.
2453    #[inline]
2454    fn select_source(&mut self) -> TransactionSource {
2455        let roll: f64 = self.rng.random();
2456        let dist = &self.config.source_distribution;
2457
2458        if roll < dist.manual {
2459            TransactionSource::Manual
2460        } else if roll < dist.manual + dist.automated {
2461            TransactionSource::Automated
2462        } else if roll < dist.manual + dist.automated + dist.recurring {
2463            TransactionSource::Recurring
2464        } else {
2465            TransactionSource::Adjustment
2466        }
2467    }
2468
2469    /// Select a business process based on configuration weights.
2470    #[inline]
2471    /// Map a business process to a SAP-style document type code.
2472    ///
2473    /// - P2P → "KR" (vendor invoice)
2474    /// - O2C → "DR" (customer invoice)
2475    /// - R2R → "SA" (general journal)
2476    /// - H2R → "HR" (HR posting)
2477    /// - A2R → "AA" (asset posting)
2478    /// - others → "SA"
2479    fn document_type_for_process(process: BusinessProcess) -> &'static str {
2480        match process {
2481            BusinessProcess::P2P => "KR",
2482            BusinessProcess::O2C => "DR",
2483            BusinessProcess::R2R => "SA",
2484            BusinessProcess::H2R => "HR",
2485            BusinessProcess::A2R => "AA",
2486            _ => "SA",
2487        }
2488    }
2489
2490    fn select_business_process(&mut self) -> BusinessProcess {
2491        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2492    }
2493
2494    #[inline]
2495    fn select_debit_account(&mut self) -> &GLAccount {
2496        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2497        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2498
2499        // 60% asset, 40% expense for debits
2500        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2501            accounts
2502        } else {
2503            expense_accounts
2504        };
2505
2506        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2507            tracing::warn!(
2508                "Account selection returned empty list, falling back to first COA account"
2509            );
2510            &self.coa.accounts[0]
2511        })
2512    }
2513
2514    #[inline]
2515    fn select_credit_account(&mut self) -> &GLAccount {
2516        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2517        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2518
2519        // 60% liability, 40% revenue for credits
2520        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2521            liability_accounts
2522        } else {
2523            revenue_accounts
2524        };
2525
2526        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2527            tracing::warn!(
2528                "Account selection returned empty list, falling back to first COA account"
2529            );
2530            &self.coa.accounts[0]
2531        })
2532    }
2533}
2534
2535impl Generator for JournalEntryGenerator {
2536    type Item = JournalEntry;
2537    type Config = (
2538        TransactionConfig,
2539        Arc<ChartOfAccounts>,
2540        Vec<String>,
2541        NaiveDate,
2542        NaiveDate,
2543    );
2544
2545    fn new(config: Self::Config, seed: u64) -> Self {
2546        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2547    }
2548
2549    fn generate_one(&mut self) -> Self::Item {
2550        self.generate()
2551    }
2552
2553    fn reset(&mut self) {
2554        self.rng = seeded_rng(self.seed, 0);
2555        self.line_sampler.reset(self.seed + 1);
2556        self.amount_sampler.reset(self.seed + 2);
2557        self.temporal_sampler.reset(self.seed + 3);
2558        if let Some(ref mut adv) = self.advanced_amount_sampler {
2559            adv.reset(self.seed + 2);
2560        }
2561        self.count = 0;
2562        self.uuid_factory.reset();
2563
2564        // Reset reference generator by recreating it
2565        let mut ref_gen = ReferenceGenerator::new(
2566            self.start_date.year(),
2567            self.companies
2568                .first()
2569                .map(std::string::String::as_str)
2570                .unwrap_or("1000"),
2571        );
2572        ref_gen.set_prefix(
2573            ReferenceType::Invoice,
2574            &self.template_config.references.invoice_prefix,
2575        );
2576        ref_gen.set_prefix(
2577            ReferenceType::PurchaseOrder,
2578            &self.template_config.references.po_prefix,
2579        );
2580        ref_gen.set_prefix(
2581            ReferenceType::SalesOrder,
2582            &self.template_config.references.so_prefix,
2583        );
2584        self.reference_generator = ref_gen;
2585    }
2586
2587    fn count(&self) -> u64 {
2588        self.count
2589    }
2590
2591    fn seed(&self) -> u64 {
2592        self.seed
2593    }
2594}
2595
2596use datasynth_core::traits::ParallelGenerator;
2597
2598impl ParallelGenerator for JournalEntryGenerator {
2599    /// Split this generator into `parts` independent sub-generators.
2600    ///
2601    /// Each sub-generator gets a deterministic seed derived from the parent seed
2602    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2603    /// The results are deterministic for a given partition count.
2604    fn split(self, parts: usize) -> Vec<Self> {
2605        let parts = parts.max(1);
2606        (0..parts)
2607            .map(|i| {
2608                // Derive a unique seed per partition using a golden-ratio constant
2609                let sub_seed = self
2610                    .seed
2611                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2612
2613                let mut gen = JournalEntryGenerator::new_with_full_config(
2614                    self.config.clone(),
2615                    Arc::clone(&self.coa),
2616                    self.companies.clone(),
2617                    self.start_date,
2618                    self.end_date,
2619                    sub_seed,
2620                    self.template_config.clone(),
2621                    self.user_pool.clone(),
2622                );
2623
2624                // Copy over configuration state
2625                gen.company_selector = self.company_selector.clone();
2626                gen.vendor_pool = self.vendor_pool.clone();
2627                gen.customer_pool = self.customer_pool.clone();
2628                gen.material_pool = self.material_pool.clone();
2629                // v5.9.0: master-data pools so sub-generators emit
2630                // CC/PC values that join back to the corresponding
2631                // masters (without these clones, parallel workers
2632                // fell back to the hardcoded `COST_CENTER_POOL` const
2633                // and the legacy `PC-{COMP}-{P2P|O2C|...}` derivation).
2634                gen.cost_center_pool = self.cost_center_pool.clone();
2635                gen.profit_center_pool = self.profit_center_pool.clone();
2636                gen.using_real_master_data = self.using_real_master_data;
2637                gen.fraud_config = self.fraud_config.clone();
2638                gen.persona_errors_enabled = self.persona_errors_enabled;
2639                gen.approval_enabled = self.approval_enabled;
2640                gen.approval_threshold = self.approval_threshold;
2641                gen.sod_violation_rate = self.sod_violation_rate;
2642                // v3.4.0+: advanced amount sampler (mixture / Pareto /
2643                // Gaussian). Clone and reset the internal RNG with the
2644                // partition's sub_seed so each worker explores a unique
2645                // subsequence without repeating the parent stream.
2646                if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2647                    adv.reset(sub_seed.wrapping_add(2));
2648                    gen.advanced_amount_sampler = Some(adv);
2649                }
2650                // v3.5.3+: conditional amount override — clone + reset
2651                // so each partition gets a fresh deterministic stream.
2652                if let Some(mut cond) = self.conditional_amount_override.clone() {
2653                    cond.reset(sub_seed.wrapping_add(17));
2654                    gen.conditional_amount_override = Some(cond);
2655                }
2656                // v3.5.4+: copula sampler — clone + reset per partition.
2657                if let Some(mut cop) = self.correlation_copula.clone() {
2658                    cop.reset(sub_seed.wrapping_add(31));
2659                    gen.correlation_copula = Some(cop);
2660                }
2661
2662                // Use partitioned UUID factory to eliminate atomic contention
2663                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2664                    sub_seed,
2665                    GeneratorType::JournalEntry,
2666                    i as u8,
2667                );
2668
2669                // Copy temporal patterns if configured
2670                if let Some(ref config) = self.temporal_patterns_config {
2671                    gen.temporal_patterns_config = Some(config.clone());
2672                    // Rebuild business day calculator from the stored config
2673                    if config.business_days.enabled {
2674                        if let Some(ref bdc) = self.business_day_calculator {
2675                            gen.business_day_calculator = Some(bdc.clone());
2676                        }
2677                    }
2678                    // Rebuild processing lag calculator with partition seed
2679                    if config.processing_lags.enabled {
2680                        let lag_config =
2681                            Self::convert_processing_lag_config(&config.processing_lags);
2682                        gen.processing_lag_calculator =
2683                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2684                    }
2685                }
2686
2687                // Copy drift controller if present
2688                if let Some(ref dc) = self.drift_controller {
2689                    gen.drift_controller = Some(dc.clone());
2690                }
2691
2692                gen
2693            })
2694            .collect()
2695    }
2696}
2697
2698#[cfg(test)]
2699#[allow(clippy::unwrap_used)]
2700mod tests {
2701    use super::*;
2702    use crate::ChartOfAccountsGenerator;
2703
2704    #[test]
2705    fn test_generate_balanced_entries() {
2706        let mut coa_gen =
2707            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2708        let coa = Arc::new(coa_gen.generate());
2709
2710        let mut je_gen = JournalEntryGenerator::new_with_params(
2711            TransactionConfig::default(),
2712            coa,
2713            vec!["1000".to_string()],
2714            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2715            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2716            42,
2717        );
2718
2719        let mut balanced_count = 0;
2720        for _ in 0..100 {
2721            let entry = je_gen.generate();
2722
2723            // Skip entries with human errors as they may be intentionally unbalanced
2724            let has_human_error = entry
2725                .header
2726                .header_text
2727                .as_ref()
2728                .map(|t| t.contains("[HUMAN_ERROR:"))
2729                .unwrap_or(false);
2730
2731            if !has_human_error {
2732                assert!(
2733                    entry.is_balanced(),
2734                    "Entry {:?} is not balanced",
2735                    entry.header.document_id
2736                );
2737                balanced_count += 1;
2738            }
2739            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2740        }
2741
2742        // Ensure most entries are balanced (human errors are rare)
2743        assert!(
2744            balanced_count >= 80,
2745            "Expected at least 80 balanced entries, got {}",
2746            balanced_count
2747        );
2748    }
2749
2750    #[test]
2751    fn test_deterministic_generation() {
2752        let mut coa_gen =
2753            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2754        let coa = Arc::new(coa_gen.generate());
2755
2756        let mut gen1 = JournalEntryGenerator::new_with_params(
2757            TransactionConfig::default(),
2758            Arc::clone(&coa),
2759            vec!["1000".to_string()],
2760            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2761            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2762            42,
2763        );
2764
2765        let mut gen2 = JournalEntryGenerator::new_with_params(
2766            TransactionConfig::default(),
2767            coa,
2768            vec!["1000".to_string()],
2769            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2770            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2771            42,
2772        );
2773
2774        for _ in 0..50 {
2775            let e1 = gen1.generate();
2776            let e2 = gen2.generate();
2777            assert_eq!(e1.header.document_id, e2.header.document_id);
2778            assert_eq!(e1.total_debit(), e2.total_debit());
2779        }
2780    }
2781
2782    #[test]
2783    fn test_templates_generate_descriptions() {
2784        let mut coa_gen =
2785            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2786        let coa = Arc::new(coa_gen.generate());
2787
2788        // Enable all template features
2789        let template_config = TemplateConfig {
2790            names: datasynth_config::schema::NameTemplateConfig {
2791                generate_realistic_names: true,
2792                email_domain: "test.com".to_string(),
2793                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2794            },
2795            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2796                generate_header_text: true,
2797                generate_line_text: true,
2798            },
2799            references: datasynth_config::schema::ReferenceTemplateConfig {
2800                generate_references: true,
2801                invoice_prefix: "TEST-INV".to_string(),
2802                po_prefix: "TEST-PO".to_string(),
2803                so_prefix: "TEST-SO".to_string(),
2804            },
2805            path: None,
2806            merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2807        };
2808
2809        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2810            TransactionConfig::default(),
2811            coa,
2812            vec!["1000".to_string()],
2813            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2814            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2815            42,
2816            template_config,
2817            None,
2818        )
2819        .with_persona_errors(false); // Disable for template testing
2820
2821        for _ in 0..10 {
2822            let entry = je_gen.generate();
2823
2824            // Verify header text is populated
2825            assert!(
2826                entry.header.header_text.is_some(),
2827                "Header text should be populated"
2828            );
2829
2830            // Verify reference is populated
2831            assert!(
2832                entry.header.reference.is_some(),
2833                "Reference should be populated"
2834            );
2835
2836            // Verify business process is set
2837            assert!(
2838                entry.header.business_process.is_some(),
2839                "Business process should be set"
2840            );
2841
2842            // Verify line text is populated
2843            for line in &entry.lines {
2844                assert!(line.line_text.is_some(), "Line text should be populated");
2845            }
2846
2847            // Entry should still be balanced
2848            assert!(entry.is_balanced());
2849        }
2850    }
2851
2852    #[test]
2853    fn test_user_pool_integration() {
2854        let mut coa_gen =
2855            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2856        let coa = Arc::new(coa_gen.generate());
2857
2858        let companies = vec!["1000".to_string()];
2859
2860        // Generate user pool
2861        let mut user_gen = crate::UserGenerator::new(42);
2862        let user_pool = user_gen.generate_standard(&companies);
2863
2864        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2865            TransactionConfig::default(),
2866            coa,
2867            companies,
2868            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2869            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2870            42,
2871            TemplateConfig::default(),
2872            Some(user_pool),
2873        );
2874
2875        // Generate entries and verify user IDs are from pool
2876        for _ in 0..20 {
2877            let entry = je_gen.generate();
2878
2879            // User ID should not be generic BATCH/USER format when pool is used
2880            // (though it may still fall back if random selection misses)
2881            assert!(!entry.header.created_by.is_empty());
2882        }
2883    }
2884
2885    #[test]
2886    fn test_master_data_connection() {
2887        let mut coa_gen =
2888            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2889        let coa = Arc::new(coa_gen.generate());
2890
2891        // Create test vendors
2892        let vendors = vec![
2893            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2894            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2895        ];
2896
2897        // Create test customers
2898        let customers = vec![
2899            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2900            Customer::new(
2901                "C-TEST-002",
2902                "Test Customer Two",
2903                CustomerType::SmallBusiness,
2904            ),
2905        ];
2906
2907        // Create test materials
2908        let materials = vec![Material::new(
2909            "MAT-TEST-001",
2910            "Test Material A",
2911            MaterialType::RawMaterial,
2912        )];
2913
2914        // Create generator with master data
2915        let generator = JournalEntryGenerator::new_with_params(
2916            TransactionConfig::default(),
2917            coa,
2918            vec!["1000".to_string()],
2919            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2920            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2921            42,
2922        );
2923
2924        // Without master data
2925        assert!(!generator.is_using_real_master_data());
2926
2927        // Connect master data
2928        let generator_with_data = generator
2929            .with_vendors(&vendors)
2930            .with_customers(&customers)
2931            .with_materials(&materials);
2932
2933        // Should now be using real master data
2934        assert!(generator_with_data.is_using_real_master_data());
2935    }
2936
2937    #[test]
2938    fn test_with_master_data_convenience_method() {
2939        let mut coa_gen =
2940            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2941        let coa = Arc::new(coa_gen.generate());
2942
2943        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2944        let customers = vec![Customer::new(
2945            "C-001",
2946            "Customer One",
2947            CustomerType::Corporate,
2948        )];
2949        let materials = vec![Material::new(
2950            "MAT-001",
2951            "Material One",
2952            MaterialType::RawMaterial,
2953        )];
2954
2955        let generator = JournalEntryGenerator::new_with_params(
2956            TransactionConfig::default(),
2957            coa,
2958            vec!["1000".to_string()],
2959            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2960            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2961            42,
2962        )
2963        .with_master_data(&vendors, &customers, &materials);
2964
2965        assert!(generator.is_using_real_master_data());
2966    }
2967
2968    #[test]
2969    fn test_stress_factors_increase_error_rate() {
2970        let mut coa_gen =
2971            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2972        let coa = Arc::new(coa_gen.generate());
2973
2974        let generator = JournalEntryGenerator::new_with_params(
2975            TransactionConfig::default(),
2976            coa,
2977            vec!["1000".to_string()],
2978            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2979            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2980            42,
2981        );
2982
2983        let base_rate = 0.1;
2984
2985        // Regular day - no stress factors
2986        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2987        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2988        assert!(
2989            (regular_rate - base_rate).abs() < 0.01,
2990            "Regular day should have minimal stress factor adjustment"
2991        );
2992
2993        // Month end - 50% more errors
2994        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2995        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2996        assert!(
2997            month_end_rate > regular_rate,
2998            "Month end should have higher error rate than regular day"
2999        );
3000
3001        // Year end - double the error rate
3002        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
3003        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
3004        assert!(
3005            year_end_rate > month_end_rate,
3006            "Year end should have highest error rate"
3007        );
3008
3009        // Friday stress
3010        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
3011        let friday_rate = generator.apply_stress_factors(base_rate, friday);
3012        assert!(
3013            friday_rate > regular_rate,
3014            "Friday should have higher error rate than mid-week"
3015        );
3016
3017        // Monday stress
3018        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
3019        let monday_rate = generator.apply_stress_factors(base_rate, monday);
3020        assert!(
3021            monday_rate > regular_rate,
3022            "Monday should have higher error rate than mid-week"
3023        );
3024    }
3025
3026    #[test]
3027    fn test_batching_produces_similar_entries() {
3028        let mut coa_gen =
3029            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3030        let coa = Arc::new(coa_gen.generate());
3031
3032        // Use seed 123 which is more likely to trigger batching
3033        let mut je_gen = JournalEntryGenerator::new_with_params(
3034            TransactionConfig::default(),
3035            coa,
3036            vec!["1000".to_string()],
3037            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3038            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3039            123,
3040        )
3041        .with_persona_errors(false); // Disable to ensure balanced entries
3042
3043        // Generate many entries - at 15% batch rate, should see some batches
3044        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
3045
3046        // Check that all entries are balanced (batched or not)
3047        for entry in &entries {
3048            assert!(
3049                entry.is_balanced(),
3050                "All entries including batched should be balanced"
3051            );
3052        }
3053
3054        // Count entries with same-day posting dates (batch indicator)
3055        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
3056            std::collections::HashMap::new();
3057        for entry in &entries {
3058            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
3059        }
3060
3061        // With batching, some dates should have multiple entries
3062        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
3063        assert!(
3064            dates_with_multiple > 0,
3065            "With batching, should see some dates with multiple entries"
3066        );
3067    }
3068
3069    #[test]
3070    fn test_temporal_patterns_business_days() {
3071        use datasynth_config::schema::{
3072            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
3073        };
3074
3075        let mut coa_gen =
3076            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3077        let coa = Arc::new(coa_gen.generate());
3078
3079        // Create temporal patterns config with business days enabled
3080        let temporal_config = TemporalPatternsConfig {
3081            enabled: true,
3082            business_days: BusinessDaySchemaConfig {
3083                enabled: true,
3084                ..Default::default()
3085            },
3086            calendars: CalendarSchemaConfig {
3087                regions: vec!["US".to_string()],
3088                custom_holidays: vec![],
3089            },
3090            ..Default::default()
3091        };
3092
3093        let mut je_gen = JournalEntryGenerator::new_with_params(
3094            TransactionConfig::default(),
3095            coa,
3096            vec!["1000".to_string()],
3097            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3098            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
3099            42,
3100        )
3101        .with_temporal_patterns(temporal_config, 42)
3102        .with_persona_errors(false);
3103
3104        // Generate entries and verify none fall on weekends
3105        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
3106
3107        for entry in &entries {
3108            let weekday = entry.header.posting_date.weekday();
3109            assert!(
3110                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
3111                "Posting date {:?} should not be a weekend",
3112                entry.header.posting_date
3113            );
3114        }
3115    }
3116
3117    #[test]
3118    fn test_default_generation_filters_weekends() {
3119        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
3120        // This tests the fix where new_with_full_config always creates a default
3121        // BusinessDayCalculator with US holidays as a fallback.
3122        let mut coa_gen =
3123            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3124        let coa = Arc::new(coa_gen.generate());
3125
3126        let mut je_gen = JournalEntryGenerator::new_with_params(
3127            TransactionConfig::default(),
3128            coa,
3129            vec!["1000".to_string()],
3130            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3131            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3132            42,
3133        )
3134        .with_persona_errors(false);
3135
3136        let total = 500;
3137        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3138
3139        let weekend_count = entries
3140            .iter()
3141            .filter(|e| {
3142                let wd = e.header.posting_date.weekday();
3143                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
3144            })
3145            .count();
3146
3147        let weekend_pct = weekend_count as f64 / total as f64;
3148        assert!(
3149            weekend_pct < 0.05,
3150            "Expected weekend entries <5% of total without temporal_patterns enabled, \
3151             but got {:.1}% ({}/{})",
3152            weekend_pct * 100.0,
3153            weekend_count,
3154            total
3155        );
3156    }
3157
3158    #[test]
3159    fn test_document_type_derived_from_business_process() {
3160        let mut coa_gen =
3161            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3162        let coa = Arc::new(coa_gen.generate());
3163
3164        let mut je_gen = JournalEntryGenerator::new_with_params(
3165            TransactionConfig::default(),
3166            coa,
3167            vec!["1000".to_string()],
3168            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3169            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3170            99,
3171        )
3172        .with_persona_errors(false)
3173        .with_batching(false);
3174
3175        let total = 200;
3176        let mut doc_types = std::collections::HashSet::new();
3177        let mut sa_count = 0_usize;
3178
3179        for _ in 0..total {
3180            let entry = je_gen.generate();
3181            let dt = &entry.header.document_type;
3182            doc_types.insert(dt.clone());
3183            if dt == "SA" {
3184                sa_count += 1;
3185            }
3186        }
3187
3188        // Should have more than 3 distinct document types
3189        assert!(
3190            doc_types.len() > 3,
3191            "Expected >3 distinct document types, got {} ({:?})",
3192            doc_types.len(),
3193            doc_types,
3194        );
3195
3196        // "SA" should be less than 50% (R2R is 20% of the weight)
3197        let sa_pct = sa_count as f64 / total as f64;
3198        assert!(
3199            sa_pct < 0.50,
3200            "Expected SA <50%, got {:.1}% ({}/{})",
3201            sa_pct * 100.0,
3202            sa_count,
3203            total,
3204        );
3205    }
3206
3207    #[test]
3208    fn test_enrich_line_items_account_description() {
3209        let mut coa_gen =
3210            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3211        let coa = Arc::new(coa_gen.generate());
3212
3213        let mut je_gen = JournalEntryGenerator::new_with_params(
3214            TransactionConfig::default(),
3215            coa,
3216            vec!["1000".to_string()],
3217            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3218            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3219            42,
3220        )
3221        .with_persona_errors(false);
3222
3223        let total = 200;
3224        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3225
3226        // Count lines with account_description populated
3227        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3228        let lines_with_desc: usize = entries
3229            .iter()
3230            .flat_map(|e| &e.lines)
3231            .filter(|l| l.account_description.is_some())
3232            .count();
3233
3234        let desc_pct = lines_with_desc as f64 / total_lines as f64;
3235        assert!(
3236            desc_pct > 0.95,
3237            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
3238            desc_pct * 100.0,
3239            lines_with_desc,
3240            total_lines,
3241        );
3242    }
3243
3244    #[test]
3245    fn test_enrich_line_items_cost_center_for_expense_accounts() {
3246        let mut coa_gen =
3247            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3248        let coa = Arc::new(coa_gen.generate());
3249
3250        let mut je_gen = JournalEntryGenerator::new_with_params(
3251            TransactionConfig::default(),
3252            coa,
3253            vec!["1000".to_string()],
3254            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3255            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3256            42,
3257        )
3258        .with_persona_errors(false);
3259
3260        let total = 300;
3261        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3262
3263        // Count expense account lines (5xxx/6xxx) with cost_center populated
3264        let expense_lines: Vec<&JournalEntryLine> = entries
3265            .iter()
3266            .flat_map(|e| &e.lines)
3267            .filter(|l| {
3268                let first = l.gl_account.chars().next().unwrap_or('0');
3269                first == '5' || first == '6'
3270            })
3271            .collect();
3272
3273        if !expense_lines.is_empty() {
3274            let with_cc = expense_lines
3275                .iter()
3276                .filter(|l| l.cost_center.is_some())
3277                .count();
3278            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
3279            assert!(
3280                cc_pct > 0.80,
3281                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
3282                cc_pct * 100.0,
3283                with_cc,
3284                expense_lines.len(),
3285            );
3286        }
3287    }
3288
3289    #[test]
3290    fn test_enrich_line_items_profit_center_and_line_text() {
3291        let mut coa_gen =
3292            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3293        let coa = Arc::new(coa_gen.generate());
3294
3295        let mut je_gen = JournalEntryGenerator::new_with_params(
3296            TransactionConfig::default(),
3297            coa,
3298            vec!["1000".to_string()],
3299            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3300            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3301            42,
3302        )
3303        .with_persona_errors(false);
3304
3305        let total = 100;
3306        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3307
3308        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3309
3310        // All lines should have profit_center
3311        let with_pc = entries
3312            .iter()
3313            .flat_map(|e| &e.lines)
3314            .filter(|l| l.profit_center.is_some())
3315            .count();
3316        let pc_pct = with_pc as f64 / total_lines as f64;
3317        assert!(
3318            pc_pct > 0.95,
3319            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3320            pc_pct * 100.0,
3321            with_pc,
3322            total_lines,
3323        );
3324
3325        // All lines should have line_text (either from template or header fallback)
3326        let with_text = entries
3327            .iter()
3328            .flat_map(|e| &e.lines)
3329            .filter(|l| l.line_text.is_some())
3330            .count();
3331        let text_pct = with_text as f64 / total_lines as f64;
3332        assert!(
3333            text_pct > 0.95,
3334            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3335            text_pct * 100.0,
3336            with_text,
3337            total_lines,
3338        );
3339    }
3340
3341    // --- ISA 240 audit flag tests ---
3342
3343    #[test]
3344    fn test_je_has_audit_flags() {
3345        let mut coa_gen =
3346            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3347        let coa = Arc::new(coa_gen.generate());
3348
3349        let mut je_gen = JournalEntryGenerator::new_with_params(
3350            TransactionConfig::default(),
3351            coa,
3352            vec!["1000".to_string()],
3353            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3354            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3355            42,
3356        )
3357        .with_persona_errors(false);
3358
3359        for _ in 0..100 {
3360            let entry = je_gen.generate();
3361
3362            // source_system should always be non-empty
3363            assert!(
3364                !entry.header.source_system.is_empty(),
3365                "source_system should be populated, got empty string"
3366            );
3367
3368            // created_by should always be non-empty (already tested elsewhere, but confirm)
3369            assert!(
3370                !entry.header.created_by.is_empty(),
3371                "created_by should be populated"
3372            );
3373
3374            // created_date should always be populated
3375            assert!(
3376                entry.header.created_date.is_some(),
3377                "created_date should be populated"
3378            );
3379        }
3380    }
3381
3382    #[test]
3383    fn test_manual_entry_rate() {
3384        let mut coa_gen =
3385            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3386        let coa = Arc::new(coa_gen.generate());
3387
3388        let mut je_gen = JournalEntryGenerator::new_with_params(
3389            TransactionConfig::default(),
3390            coa,
3391            vec!["1000".to_string()],
3392            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3393            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3394            42,
3395        )
3396        .with_persona_errors(false)
3397        .with_batching(false);
3398
3399        let total = 1000;
3400        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3401
3402        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3403        let manual_rate = manual_count as f64 / total as f64;
3404
3405        // Default source_distribution.manual is typically around 0.05-0.15
3406        // Allow a wide tolerance for statistical variation
3407        assert!(
3408            manual_rate > 0.01 && manual_rate < 0.50,
3409            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3410            manual_rate * 100.0,
3411            manual_count,
3412            total,
3413        );
3414
3415        // is_manual should match TransactionSource::Manual
3416        for entry in &entries {
3417            let source_is_manual = entry.header.source == TransactionSource::Manual;
3418            assert_eq!(
3419                entry.header.is_manual, source_is_manual,
3420                "is_manual should match source == Manual"
3421            );
3422        }
3423    }
3424
3425    #[test]
3426    fn test_manual_source_consistency() {
3427        let mut coa_gen =
3428            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3429        let coa = Arc::new(coa_gen.generate());
3430
3431        let mut je_gen = JournalEntryGenerator::new_with_params(
3432            TransactionConfig::default(),
3433            coa,
3434            vec!["1000".to_string()],
3435            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3436            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3437            42,
3438        )
3439        .with_persona_errors(false)
3440        .with_batching(false);
3441
3442        for _ in 0..500 {
3443            let entry = je_gen.generate();
3444
3445            if entry.header.is_manual {
3446                // Manual entries must have a source_system in the
3447                // `manual/...` or `spreadsheet/...` family (the bare
3448                // legacy `manual` and `spreadsheet` values are also
3449                // accepted to keep older fixtures working).
3450                let s = entry.header.source_system.as_str();
3451                assert!(
3452                    s == "manual"
3453                        || s == "spreadsheet"
3454                        || s.starts_with("manual/")
3455                        || s.starts_with("spreadsheet/"),
3456                    "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
3457                );
3458            } else {
3459                // Non-manual entries must NOT be in the manual/spreadsheet family.
3460                let s = entry.header.source_system.as_str();
3461                assert!(
3462                    !(s == "manual"
3463                        || s == "spreadsheet"
3464                        || s.starts_with("manual/")
3465                        || s.starts_with("spreadsheet/")),
3466                    "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
3467                );
3468            }
3469        }
3470    }
3471
3472    #[test]
3473    fn test_created_date_before_posting() {
3474        let mut coa_gen =
3475            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3476        let coa = Arc::new(coa_gen.generate());
3477
3478        let mut je_gen = JournalEntryGenerator::new_with_params(
3479            TransactionConfig::default(),
3480            coa,
3481            vec!["1000".to_string()],
3482            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3483            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3484            42,
3485        )
3486        .with_persona_errors(false);
3487
3488        for _ in 0..500 {
3489            let entry = je_gen.generate();
3490
3491            if let Some(created_date) = entry.header.created_date {
3492                let created_naive_date = created_date.date();
3493                assert!(
3494                    created_naive_date <= entry.header.posting_date,
3495                    "created_date ({}) should be <= posting_date ({})",
3496                    created_naive_date,
3497                    entry.header.posting_date,
3498                );
3499            }
3500        }
3501    }
3502}