Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15    TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18    AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19    DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20    PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21    ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34/// Generator for realistic journal entries.
35pub struct JournalEntryGenerator {
36    rng: ChaCha8Rng,
37    seed: u64,
38    config: TransactionConfig,
39    coa: Arc<ChartOfAccounts>,
40    companies: Vec<String>,
41    company_selector: WeightedCompanySelector,
42    line_sampler: LineItemSampler,
43    amount_sampler: AmountSampler,
44    temporal_sampler: TemporalSampler,
45    start_date: NaiveDate,
46    end_date: NaiveDate,
47    count: u64,
48    uuid_factory: DeterministicUuidFactory,
49    // Enhanced features
50    user_pool: Option<UserPool>,
51    description_generator: DescriptionGenerator,
52    reference_generator: ReferenceGenerator,
53    template_config: TemplateConfig,
54    vendor_pool: VendorPool,
55    customer_pool: CustomerPool,
56    // Material pool for realistic material references
57    material_pool: Option<MaterialPool>,
58    // Flag indicating whether we're using real master data vs defaults
59    using_real_master_data: bool,
60    // Fraud generation
61    fraud_config: FraudConfig,
62    // Persona-based error injection
63    persona_errors_enabled: bool,
64    // Approval threshold enforcement
65    approval_enabled: bool,
66    approval_threshold: rust_decimal::Decimal,
67    // SOD violation rate for approval tracking (0.0 to 1.0)
68    sod_violation_rate: f64,
69    // Batching behavior - humans often process similar items together
70    batch_state: Option<BatchState>,
71    // Temporal drift controller for simulating distribution changes over time
72    drift_controller: Option<DriftController>,
73    // Temporal patterns components
74    business_day_calculator: Option<BusinessDayCalculator>,
75    processing_lag_calculator: Option<ProcessingLagCalculator>,
76    temporal_patterns_config: Option<TemporalPatternsConfig>,
77    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
78    // sum to 1.0 (validated by config schema). Default matches the legacy
79    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
80    business_process_weights: [(BusinessProcess, f64); 5],
81    // v3.4.0 advanced distributions (mixture models + industry profiles).
82    // None preserves v3.3.2 byte-for-byte behavior; populated only when the
83    // caller opts in via [`set_advanced_distributions`].
84    advanced_amount_sampler: Option<AdvancedAmountSampler>,
85    // v3.5.3+ conditional amount override. Populated when
86    // `config.distributions.conditional` contains an entry where
87    // `output_field == "amount"` and `input_field ∈ {"month",
88    // "quarter", "constant"}`. Applied *after* the fraud-pattern /
89    // advanced-sampler / legacy-sampler cascade on non-fraud entries
90    // so it can steer amounts by calendar context without disturbing
91    // fraud semantics.
92    conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93    // v3.5.4+ Gaussian copula for amount↔line_count correlation. When
94    // populated, each non-fraud JE draws a (u, v) pair; u nudges amount
95    // via a `(0.75 + 0.5*u)` multiplier and v biases line_count toward
96    // the upper/lower end of its range. Produces observable Spearman
97    // correlation without rewiring existing samplers for inverse-CDF.
98    correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102    (BusinessProcess::O2C, 0.35),
103    (BusinessProcess::P2P, 0.30),
104    (BusinessProcess::R2R, 0.20),
105    (BusinessProcess::H2R, 0.10),
106    (BusinessProcess::A2R, 0.05),
107];
108
109/// Map the schema-level [`datasynth_config::schema::IndustryProfileType`]
110/// onto the distributions-layer [`IndustryType`], then return that industry's
111/// pre-configured `sales_amounts` mixture. Used as a fallback when the
112/// caller enables `distributions.amounts` but supplies no components.
113/// Per-entry context channels for conditional-distribution overrides.
114///
115/// v4.1.0+ supported `input_field` values:
116///
117///   - `"month"` — posting-date month (1..=12)
118///   - `"quarter"` — posting-date quarter (1..=4)
119///   - `"year"` — posting-date year (e.g. 2026.0)
120///   - `"day_of_week"` — 1 (Mon) .. 7 (Sun)
121///   - `"day_of_month"` — 1..=31
122///   - `"day_of_year"` — 1..=366
123///   - `"week_of_year"` — 1..=53
124///   - `"is_period_end"` — 1.0 when posting_date is the last business
125///     day of the month, else 0.0
126///   - `"is_quarter_end"` — 1.0 when posting_date is in a quarter-end
127///     month AND is the last business day, else 0.0
128///   - `"is_year_end"` — 1.0 when posting_date is in December AND is
129///     the last business day, else 0.0
130///   - `"constant"` / empty — always 0.0 (treats as unconditional)
131///
132/// Unsupported values cause the conditional rule to be silently ignored
133/// to keep runtime robust against user typos.
134impl JournalEntryGenerator {
135    fn supported_conditional_input(field: &str) -> bool {
136        matches!(
137            field,
138            "month"
139                | "quarter"
140                | "year"
141                | "day_of_week"
142                | "day_of_month"
143                | "day_of_year"
144                | "week_of_year"
145                | "is_period_end"
146                | "is_quarter_end"
147                | "is_year_end"
148                | "constant"
149                | ""
150        )
151    }
152
153    fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
154        let input_field = match self
155            .conditional_amount_override
156            .as_ref()
157            .map(|s| s.config().input_field.as_str())
158        {
159            Some(f) => f,
160            None => return 0.0,
161        };
162
163        let is_last_business_day = |d: chrono::NaiveDate| -> bool {
164            // Last day-of-month → is_period_end. Handles Feb/leap-year
165            // via chrono's num_days_from_ce roundabout; simpler path:
166            // if adding 1 day moves to a different month, this is EOM.
167            let next = d.succ_opt();
168            match next {
169                Some(n) => n.month() != d.month(),
170                None => true,
171            }
172        };
173
174        match input_field {
175            "month" => posting_date.month() as f64,
176            "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
177            "year" => posting_date.year() as f64,
178            "day_of_week" => posting_date.weekday().number_from_monday() as f64,
179            "day_of_month" => posting_date.day() as f64,
180            "day_of_year" => posting_date.ordinal() as f64,
181            "week_of_year" => posting_date.iso_week().week() as f64,
182            "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
183            "is_quarter_end" => {
184                let m = posting_date.month();
185                let is_q_month = matches!(m, 3 | 6 | 9 | 12);
186                f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
187            }
188            "is_year_end" => f64::from(u8::from(
189                posting_date.month() == 12 && is_last_business_day(posting_date),
190            )),
191            _ => 0.0,
192        }
193    }
194}
195
196fn industry_profile_to_log_normal(
197    p: datasynth_config::schema::IndustryProfileType,
198) -> datasynth_core::distributions::LogNormalMixtureConfig {
199    use datasynth_config::schema::IndustryProfileType as P;
200    let industry = match p {
201        P::Retail => IndustryType::Retail,
202        P::Manufacturing => IndustryType::Manufacturing,
203        P::FinancialServices => IndustryType::FinancialServices,
204        P::Healthcare => IndustryType::Healthcare,
205        P::Technology => IndustryType::Technology,
206    };
207    IndustryAmountProfile::for_industry(industry).sales_amounts
208}
209
210/// State for tracking batch processing behavior.
211///
212/// When humans process transactions, they often batch similar items together
213/// (e.g., processing all invoices from one vendor, entering similar expenses).
214#[derive(Clone)]
215struct BatchState {
216    /// The base entry template to vary
217    base_account_number: String,
218    base_amount: rust_decimal::Decimal,
219    base_business_process: Option<BusinessProcess>,
220    base_posting_date: NaiveDate,
221    /// Remaining entries in this batch
222    remaining: u8,
223}
224
225impl JournalEntryGenerator {
226    /// Create a new journal entry generator.
227    pub fn new_with_params(
228        config: TransactionConfig,
229        coa: Arc<ChartOfAccounts>,
230        companies: Vec<String>,
231        start_date: NaiveDate,
232        end_date: NaiveDate,
233        seed: u64,
234    ) -> Self {
235        Self::new_with_full_config(
236            config,
237            coa,
238            companies,
239            start_date,
240            end_date,
241            seed,
242            TemplateConfig::default(),
243            None,
244        )
245    }
246
247    /// Create a new journal entry generator with full configuration.
248    #[allow(clippy::too_many_arguments)]
249    pub fn new_with_full_config(
250        config: TransactionConfig,
251        coa: Arc<ChartOfAccounts>,
252        companies: Vec<String>,
253        start_date: NaiveDate,
254        end_date: NaiveDate,
255        seed: u64,
256        template_config: TemplateConfig,
257        user_pool: Option<UserPool>,
258    ) -> Self {
259        // Initialize user pool if not provided
260        let user_pool = user_pool.or_else(|| {
261            if template_config.names.generate_realistic_names {
262                let user_gen_config = UserGeneratorConfig {
263                    culture_distribution: vec![
264                        (
265                            datasynth_core::templates::NameCulture::WesternUs,
266                            template_config.names.culture_distribution.western_us,
267                        ),
268                        (
269                            datasynth_core::templates::NameCulture::Hispanic,
270                            template_config.names.culture_distribution.hispanic,
271                        ),
272                        (
273                            datasynth_core::templates::NameCulture::German,
274                            template_config.names.culture_distribution.german,
275                        ),
276                        (
277                            datasynth_core::templates::NameCulture::French,
278                            template_config.names.culture_distribution.french,
279                        ),
280                        (
281                            datasynth_core::templates::NameCulture::Chinese,
282                            template_config.names.culture_distribution.chinese,
283                        ),
284                        (
285                            datasynth_core::templates::NameCulture::Japanese,
286                            template_config.names.culture_distribution.japanese,
287                        ),
288                        (
289                            datasynth_core::templates::NameCulture::Indian,
290                            template_config.names.culture_distribution.indian,
291                        ),
292                    ],
293                    email_domain: template_config.names.email_domain.clone(),
294                    generate_realistic_names: true,
295                };
296                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
297                Some(user_gen.generate_standard(&companies))
298            } else {
299                None
300            }
301        });
302
303        // Initialize reference generator
304        let mut ref_gen = ReferenceGenerator::new(
305            start_date.year(),
306            companies
307                .first()
308                .map(std::string::String::as_str)
309                .unwrap_or("1000"),
310        );
311        ref_gen.set_prefix(
312            ReferenceType::Invoice,
313            &template_config.references.invoice_prefix,
314        );
315        ref_gen.set_prefix(
316            ReferenceType::PurchaseOrder,
317            &template_config.references.po_prefix,
318        );
319        ref_gen.set_prefix(
320            ReferenceType::SalesOrder,
321            &template_config.references.so_prefix,
322        );
323
324        // Create weighted company selector (uniform weights for this constructor)
325        let company_selector = WeightedCompanySelector::uniform(companies.clone());
326
327        Self {
328            rng: seeded_rng(seed, 0),
329            seed,
330            config: config.clone(),
331            coa,
332            companies,
333            company_selector,
334            line_sampler: LineItemSampler::with_config(
335                seed + 1,
336                config.line_item_distribution.clone(),
337                config.even_odd_distribution.clone(),
338                config.debit_credit_distribution.clone(),
339            ),
340            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
341            temporal_sampler: TemporalSampler::with_config(
342                seed + 3,
343                config.seasonality.clone(),
344                WorkingHoursConfig::default(),
345                Vec::new(),
346            ),
347            start_date,
348            end_date,
349            count: 0,
350            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
351            user_pool,
352            description_generator: DescriptionGenerator::new(),
353            reference_generator: ref_gen,
354            template_config,
355            vendor_pool: VendorPool::standard(),
356            customer_pool: CustomerPool::standard(),
357            material_pool: None,
358            using_real_master_data: false,
359            fraud_config: FraudConfig::default(),
360            persona_errors_enabled: true, // Enable by default for realism
361            approval_enabled: true,       // Enable by default for realism
362            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
363            sod_violation_rate: 0.10,     // 10% default SOD violation rate
364            batch_state: None,
365            drift_controller: None,
366            // Always provide a basic BusinessDayCalculator so that weekend/holiday
367            // filtering is active even when temporal_patterns is not explicitly enabled.
368            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
369                Region::US,
370                start_date.year(),
371            ))),
372            processing_lag_calculator: None,
373            temporal_patterns_config: None,
374            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
375            advanced_amount_sampler: None,
376            conditional_amount_override: None,
377            correlation_copula: None,
378        }
379    }
380
381    /// Wire v3.4.0 advanced distributions. When the caller's config has
382    /// `distributions.enabled = true` AND `distributions.amounts.enabled =
383    /// true`, the journal-entry generator routes non-fraud amount sampling
384    /// through an [`AdvancedAmountSampler`] (log-normal or Gaussian mixture).
385    ///
386    /// When `distributions.industry_profile` is `Some`, the caller's
387    /// explicitly configured components override nothing — if the component
388    /// list is empty, the industry profile's `sales_amounts` mixture is used
389    /// instead. Explicit components always win.
390    ///
391    /// Returning `Ok(())` with no side effect is intentional for the
392    /// following no-op cases, so callers can unconditionally invoke this:
393    ///   - `config.enabled = false`
394    ///   - `config.amounts.enabled = false`
395    ///   - empty component list with no industry profile
396    ///
397    /// Errors propagate from mixture validation (e.g. weights not summing
398    /// to 1.0, non-positive sigma).
399    pub fn set_advanced_distributions(
400        &mut self,
401        config: &AdvancedDistributionConfig,
402        seed: u64,
403    ) -> Result<(), String> {
404        if !config.enabled {
405            return Ok(());
406        }
407
408        // v3.5.3+: build a conditional-amount override when the config
409        // declares a rule with `output_field == "amount"` and a supported
410        // input field. The override is applied *after* the standard
411        // cascade so it doesn't disturb fraud-path sampling. Unsupported
412        // input fields are ignored with a trace log.
413        self.conditional_amount_override = config
414            .conditional
415            .iter()
416            .find(|c| {
417                c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
418            })
419            .and_then(|c| {
420                datasynth_core::distributions::ConditionalSampler::new(
421                    seed.wrapping_add(17),
422                    c.to_core_config(),
423                )
424                .ok()
425            });
426
427        // v4.1.0+: all 5 copula types wired (Gaussian / Clayton /
428        // Gumbel / Frank / Student-t). The `BivariateCopulaSampler`
429        // already implements each; v3.5.4 had a filter limiting to
430        // Gaussian only — lifted here now that the smoke test matrix
431        // covers all types.
432        self.correlation_copula = config
433            .correlations
434            .to_core_config_for_pair("amount", "line_count")
435            .and_then(|copula_cfg| {
436                datasynth_core::distributions::BivariateCopulaSampler::new(
437                    seed.wrapping_add(31),
438                    copula_cfg,
439                )
440                .ok()
441            });
442
443        // v3.4.4+: Pareto takes precedence over mixture models when set.
444        // This supports heavy-tailed amount distributions (capex, strategic
445        // contracts, fraud) that log-normal/Gaussian mixtures can't model
446        // as sharply.
447        if let Some(pareto) = &config.pareto {
448            if pareto.enabled {
449                let core_cfg = pareto.to_core_config();
450                self.advanced_amount_sampler =
451                    Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
452                return Ok(());
453            }
454        }
455
456        if !config.amounts.enabled {
457            return Ok(());
458        }
459
460        match config.amounts.distribution_type {
461            MixtureDistributionType::LogNormal => {
462                let lognormal_cfg = config
463                    .amounts
464                    .to_log_normal_config()
465                    .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
466                if let Some(cfg) = lognormal_cfg {
467                    self.advanced_amount_sampler =
468                        Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
469                }
470            }
471            MixtureDistributionType::Gaussian => {
472                if let Some(cfg) = config.amounts.to_gaussian_config() {
473                    self.advanced_amount_sampler =
474                        Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
475                }
476            }
477        }
478
479        Ok(())
480    }
481
482    /// Override the business-process volume mix. Weights map directly to the
483    /// `business_processes.*_weight` YAML config; they do not have to sum to
484    /// exactly 1.0 (they're normalized via `weighted_select`).
485    pub fn set_business_process_weights(
486        &mut self,
487        o2c: f64,
488        p2p: f64,
489        r2r: f64,
490        h2r: f64,
491        a2r: f64,
492    ) {
493        self.business_process_weights = [
494            (BusinessProcess::O2C, o2c),
495            (BusinessProcess::P2P, p2p),
496            (BusinessProcess::R2R, r2r),
497            (BusinessProcess::H2R, h2r),
498            (BusinessProcess::A2R, a2r),
499        ];
500    }
501
502    /// Create from a full GeneratorConfig.
503    ///
504    /// This constructor uses the volume_weight from company configs
505    /// for weighted company selection, and fraud config from GeneratorConfig.
506    pub fn from_generator_config(
507        full_config: &GeneratorConfig,
508        coa: Arc<ChartOfAccounts>,
509        start_date: NaiveDate,
510        end_date: NaiveDate,
511        seed: u64,
512    ) -> Self {
513        let companies: Vec<String> = full_config
514            .companies
515            .iter()
516            .map(|c| c.code.clone())
517            .collect();
518
519        // Create weighted selector using volume_weight from company configs
520        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
521
522        let mut generator = Self::new_with_full_config(
523            full_config.transactions.clone(),
524            coa,
525            companies,
526            start_date,
527            end_date,
528            seed,
529            full_config.templates.clone(),
530            None,
531        );
532
533        // Override the uniform selector with weighted selector
534        generator.company_selector = company_selector;
535
536        // Set fraud config
537        generator.fraud_config = full_config.fraud.clone();
538
539        // Configure temporal patterns if enabled
540        let temporal_config = &full_config.temporal_patterns;
541        if temporal_config.enabled {
542            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
543        }
544
545        generator
546    }
547
548    /// Configure temporal patterns including business day calculations and processing lags.
549    ///
550    /// This enables realistic temporal behavior including:
551    /// - Business day awareness (no postings on weekends/holidays)
552    /// - Processing lag modeling (event-to-posting delays)
553    /// - Period-end dynamics (volume spikes at month/quarter/year end)
554    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
555        // Create business day calculator if enabled
556        if config.business_days.enabled {
557            let region = config
558                .calendars
559                .regions
560                .first()
561                .map(|r| Self::parse_region(r))
562                .unwrap_or(Region::US);
563
564            let calendar = HolidayCalendar::new(region, self.start_date.year());
565            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
566        }
567
568        // Create processing lag calculator if enabled
569        if config.processing_lags.enabled {
570            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
571            self.processing_lag_calculator =
572                Some(ProcessingLagCalculator::with_config(seed, lag_config));
573        }
574
575        // Create period-end dynamics if configured
576        let model = config.period_end.model.as_deref().unwrap_or("flat");
577        if model != "flat"
578            || config
579                .period_end
580                .month_end
581                .as_ref()
582                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
583        {
584            let dynamics = Self::convert_period_end_config(&config.period_end);
585            self.temporal_sampler.set_period_end_dynamics(dynamics);
586        }
587
588        self.temporal_patterns_config = Some(config);
589        self
590    }
591
592    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
593    ///
594    /// This is an alternative to [`with_temporal_patterns`] that derives the
595    /// holiday calendar from a country-pack definition rather than the built-in
596    /// region-based calendars.  All other temporal behaviour (business-day
597    /// adjustment, processing lags, period-end dynamics) is configured
598    /// identically.
599    pub fn with_country_pack_temporal(
600        mut self,
601        config: TemporalPatternsConfig,
602        seed: u64,
603        pack: &CountryPack,
604    ) -> Self {
605        // Create business day calculator using the country pack calendar
606        if config.business_days.enabled {
607            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
608            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
609        }
610
611        // Create processing lag calculator if enabled
612        if config.processing_lags.enabled {
613            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
614            self.processing_lag_calculator =
615                Some(ProcessingLagCalculator::with_config(seed, lag_config));
616        }
617
618        // Create period-end dynamics if configured
619        let model = config.period_end.model.as_deref().unwrap_or("flat");
620        if model != "flat"
621            || config
622                .period_end
623                .month_end
624                .as_ref()
625                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
626        {
627            let dynamics = Self::convert_period_end_config(&config.period_end);
628            self.temporal_sampler.set_period_end_dynamics(dynamics);
629        }
630
631        self.temporal_patterns_config = Some(config);
632        self
633    }
634
635    /// Convert schema processing lag config to core config.
636    fn convert_processing_lag_config(
637        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
638    ) -> ProcessingLagConfig {
639        let mut config = ProcessingLagConfig {
640            enabled: schema.enabled,
641            ..Default::default()
642        };
643
644        // Helper to convert lag schema to distribution
645        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
646            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
647            if let Some(min) = lag.min_hours {
648                dist.min_lag_hours = min;
649            }
650            if let Some(max) = lag.max_hours {
651                dist.max_lag_hours = max;
652            }
653            dist
654        };
655
656        // Apply event-specific lags
657        if let Some(ref lag) = schema.sales_order_lag {
658            config
659                .event_lags
660                .insert(EventType::SalesOrder, convert_lag(lag));
661        }
662        if let Some(ref lag) = schema.purchase_order_lag {
663            config
664                .event_lags
665                .insert(EventType::PurchaseOrder, convert_lag(lag));
666        }
667        if let Some(ref lag) = schema.goods_receipt_lag {
668            config
669                .event_lags
670                .insert(EventType::GoodsReceipt, convert_lag(lag));
671        }
672        if let Some(ref lag) = schema.invoice_receipt_lag {
673            config
674                .event_lags
675                .insert(EventType::InvoiceReceipt, convert_lag(lag));
676        }
677        if let Some(ref lag) = schema.invoice_issue_lag {
678            config
679                .event_lags
680                .insert(EventType::InvoiceIssue, convert_lag(lag));
681        }
682        if let Some(ref lag) = schema.payment_lag {
683            config
684                .event_lags
685                .insert(EventType::Payment, convert_lag(lag));
686        }
687        if let Some(ref lag) = schema.journal_entry_lag {
688            config
689                .event_lags
690                .insert(EventType::JournalEntry, convert_lag(lag));
691        }
692
693        // Apply cross-day posting config
694        if let Some(ref cross_day) = schema.cross_day_posting {
695            config.cross_day = CrossDayConfig {
696                enabled: cross_day.enabled,
697                probability_by_hour: cross_day.probability_by_hour.clone(),
698                ..Default::default()
699            };
700        }
701
702        config
703    }
704
705    /// Convert schema period-end config to core PeriodEndDynamics.
706    fn convert_period_end_config(
707        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
708    ) -> PeriodEndDynamics {
709        let model_type = schema.model.as_deref().unwrap_or("exponential");
710
711        // Helper to convert period config
712        let convert_period =
713            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
714             default_peak: f64|
715             -> PeriodEndConfig {
716                if let Some(p) = period {
717                    let model = match model_type {
718                        "flat" => PeriodEndModel::FlatMultiplier {
719                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
720                        },
721                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
722                            start_day: p.start_day.unwrap_or(-10),
723                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
724                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
725                            ramp_up_days: 3, // Default ramp-up period
726                        },
727                        _ => PeriodEndModel::ExponentialAcceleration {
728                            start_day: p.start_day.unwrap_or(-10),
729                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
730                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
731                            decay_rate: p.decay_rate.unwrap_or(0.3),
732                        },
733                    };
734                    PeriodEndConfig {
735                        enabled: true,
736                        model,
737                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
738                    }
739                } else {
740                    PeriodEndConfig {
741                        enabled: true,
742                        model: PeriodEndModel::ExponentialAcceleration {
743                            start_day: -10,
744                            base_multiplier: 1.0,
745                            peak_multiplier: default_peak,
746                            decay_rate: 0.3,
747                        },
748                        additional_multiplier: 1.0,
749                    }
750                }
751            };
752
753        PeriodEndDynamics::new(
754            convert_period(schema.month_end.as_ref(), 2.0),
755            convert_period(schema.quarter_end.as_ref(), 3.5),
756            convert_period(schema.year_end.as_ref(), 5.0),
757        )
758    }
759
760    /// Parse a region string into a Region enum.
761    fn parse_region(region_str: &str) -> Region {
762        match region_str.to_uppercase().as_str() {
763            "US" => Region::US,
764            "DE" => Region::DE,
765            "GB" => Region::GB,
766            "CN" => Region::CN,
767            "JP" => Region::JP,
768            "IN" => Region::IN,
769            "BR" => Region::BR,
770            "MX" => Region::MX,
771            "AU" => Region::AU,
772            "SG" => Region::SG,
773            "KR" => Region::KR,
774            "FR" => Region::FR,
775            "IT" => Region::IT,
776            "ES" => Region::ES,
777            "CA" => Region::CA,
778            _ => Region::US,
779        }
780    }
781
782    /// Set a custom company selector.
783    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
784        self.company_selector = selector;
785    }
786
787    /// Get the current company selector.
788    pub fn company_selector(&self) -> &WeightedCompanySelector {
789        &self.company_selector
790    }
791
792    /// Set fraud configuration.
793    pub fn set_fraud_config(&mut self, config: FraudConfig) {
794        self.fraud_config = config;
795    }
796
797    /// Set vendors from generated master data.
798    ///
799    /// This replaces the default vendor pool with actual generated vendors,
800    /// ensuring JEs reference real master data entities.
801    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
802        if !vendors.is_empty() {
803            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
804            self.using_real_master_data = true;
805        }
806        self
807    }
808
809    /// Set customers from generated master data.
810    ///
811    /// This replaces the default customer pool with actual generated customers,
812    /// ensuring JEs reference real master data entities.
813    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
814        if !customers.is_empty() {
815            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
816            self.using_real_master_data = true;
817        }
818        self
819    }
820
821    /// Set materials from generated master data.
822    ///
823    /// This provides material references for JEs that involve inventory movements.
824    pub fn with_materials(mut self, materials: &[Material]) -> Self {
825        if !materials.is_empty() {
826            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
827            self.using_real_master_data = true;
828        }
829        self
830    }
831
832    /// Set all master data at once for convenience.
833    ///
834    /// This is the recommended way to configure the JE generator with
835    /// generated master data to ensure data coherence.
836    pub fn with_master_data(
837        self,
838        vendors: &[Vendor],
839        customers: &[Customer],
840        materials: &[Material],
841    ) -> Self {
842        self.with_vendors(vendors)
843            .with_customers(customers)
844            .with_materials(materials)
845    }
846
847    /// Replace the user pool with one generated from a [`CountryPack`].
848    ///
849    /// This is an alternative to the default name-culture distribution that
850    /// derives name pools and weights from the country-pack's `names` section.
851    /// The existing user pool (if any) is discarded and regenerated using
852    /// [`MultiCultureNameGenerator::from_country_pack`].
853    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
854        let name_gen =
855            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
856        let config = UserGeneratorConfig {
857            // The culture distribution is embedded in the name generator
858            // itself, so we use an empty list here.
859            culture_distribution: Vec::new(),
860            email_domain: name_gen.email_domain().to_string(),
861            generate_realistic_names: true,
862        };
863        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
864        self.user_pool = Some(user_gen.generate_standard(&self.companies));
865        self
866    }
867
868    /// Check if the generator is using real master data.
869    pub fn is_using_real_master_data(&self) -> bool {
870        self.using_real_master_data
871    }
872
873    /// Determine if this transaction should be fraudulent.
874    fn determine_fraud(&mut self) -> Option<FraudType> {
875        if !self.fraud_config.enabled {
876            return None;
877        }
878
879        // Roll for fraud based on fraud rate
880        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
881            return None;
882        }
883
884        // Select fraud type based on distribution
885        Some(self.select_fraud_type())
886    }
887
888    /// Select a fraud type based on the configured distribution.
889    fn select_fraud_type(&mut self) -> FraudType {
890        let dist = &self.fraud_config.fraud_type_distribution;
891        let roll: f64 = self.rng.random();
892
893        let mut cumulative = 0.0;
894
895        cumulative += dist.suspense_account_abuse;
896        if roll < cumulative {
897            return FraudType::SuspenseAccountAbuse;
898        }
899
900        cumulative += dist.fictitious_transaction;
901        if roll < cumulative {
902            return FraudType::FictitiousTransaction;
903        }
904
905        cumulative += dist.revenue_manipulation;
906        if roll < cumulative {
907            return FraudType::RevenueManipulation;
908        }
909
910        cumulative += dist.expense_capitalization;
911        if roll < cumulative {
912            return FraudType::ExpenseCapitalization;
913        }
914
915        cumulative += dist.split_transaction;
916        if roll < cumulative {
917            return FraudType::SplitTransaction;
918        }
919
920        cumulative += dist.timing_anomaly;
921        if roll < cumulative {
922            return FraudType::TimingAnomaly;
923        }
924
925        cumulative += dist.unauthorized_access;
926        if roll < cumulative {
927            return FraudType::UnauthorizedAccess;
928        }
929
930        // Default fallback
931        FraudType::DuplicatePayment
932    }
933
934    /// Map a fraud type to an amount pattern for suspicious amounts.
935    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
936        match fraud_type {
937            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
938                FraudAmountPattern::ThresholdAdjacent
939            }
940            FraudType::FictitiousTransaction
941            | FraudType::FictitiousEntry
942            | FraudType::SuspenseAccountAbuse
943            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
944            FraudType::RevenueManipulation
945            | FraudType::ExpenseCapitalization
946            | FraudType::ImproperCapitalization
947            | FraudType::ReserveManipulation
948            | FraudType::UnauthorizedAccess
949            | FraudType::PrematureRevenue
950            | FraudType::UnderstatedLiabilities
951            | FraudType::OverstatedAssets
952            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
953            FraudType::DuplicatePayment
954            | FraudType::TimingAnomaly
955            | FraudType::SelfApproval
956            | FraudType::ExceededApprovalLimit
957            | FraudType::SegregationOfDutiesViolation
958            | FraudType::UnauthorizedApproval
959            | FraudType::CollusiveApproval
960            | FraudType::FictitiousVendor
961            | FraudType::ShellCompanyPayment
962            | FraudType::Kickback
963            | FraudType::KickbackScheme
964            | FraudType::InvoiceManipulation
965            | FraudType::AssetMisappropriation
966            | FraudType::InventoryTheft
967            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
968            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
969            FraudType::ImproperRevenueRecognition
970            | FraudType::ImproperPoAllocation
971            | FraudType::VariableConsiderationManipulation
972            | FraudType::ContractModificationMisstatement => {
973                FraudAmountPattern::StatisticallyImprobable
974            }
975            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
976            FraudType::LeaseClassificationManipulation
977            | FraudType::OffBalanceSheetLease
978            | FraudType::LeaseLiabilityUnderstatement
979            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
980            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
981            FraudType::FairValueHierarchyManipulation
982            | FraudType::Level3InputManipulation
983            | FraudType::ValuationTechniqueManipulation => {
984                FraudAmountPattern::StatisticallyImprobable
985            }
986            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
987            FraudType::DelayedImpairment
988            | FraudType::ImpairmentTestAvoidance
989            | FraudType::CashFlowProjectionManipulation
990            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
991            // Sourcing/Procurement Fraud
992            FraudType::BidRigging
993            | FraudType::PhantomVendorContract
994            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
995            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
996            // HR/Payroll Fraud
997            FraudType::GhostEmployeePayroll
998            | FraudType::PayrollInflation
999            | FraudType::DuplicateExpenseReport
1000            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1001            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1002            // O2C Fraud
1003            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1004            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1005        }
1006    }
1007
1008    /// Generate a deterministic UUID using the factory.
1009    #[inline]
1010    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1011        self.uuid_factory.next()
1012    }
1013
1014    /// Cost center pool used for expense account enrichment.
1015    const COST_CENTER_POOL: &'static [&'static str] =
1016        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1017
1018    /// Enrich journal entry line items with account descriptions, cost centers,
1019    /// profit centers, value dates, line text, and assignment fields.
1020    ///
1021    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
1022    /// and `::credit()` leave as `None`.
1023    fn enrich_line_items(&self, entry: &mut JournalEntry) {
1024        let posting_date = entry.header.posting_date;
1025        let company_code = &entry.header.company_code;
1026        let header_text = entry.header.header_text.clone();
1027        let business_process = entry.header.business_process;
1028
1029        // Derive a deterministic index from the document_id for cost center selection
1030        let doc_id_bytes = entry.header.document_id.as_bytes();
1031        let mut cc_seed: usize = 0;
1032        for &b in doc_id_bytes {
1033            cc_seed = cc_seed.wrapping_add(b as usize);
1034        }
1035
1036        for (i, line) in entry.lines.iter_mut().enumerate() {
1037            // 1. account_description: look up from CoA
1038            if line.account_description.is_none() {
1039                line.account_description = self
1040                    .coa
1041                    .get_account(&line.gl_account)
1042                    .map(|a| a.short_description.clone());
1043            }
1044
1045            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
1046            if line.cost_center.is_none() {
1047                let first_char = line.gl_account.chars().next().unwrap_or('0');
1048                if first_char == '5' || first_char == '6' {
1049                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1050                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1051                }
1052            }
1053
1054            // 3. profit_center: derive from company code + business process
1055            if line.profit_center.is_none() {
1056                let suffix = match business_process {
1057                    Some(BusinessProcess::P2P) => "-P2P",
1058                    Some(BusinessProcess::O2C) => "-O2C",
1059                    Some(BusinessProcess::R2R) => "-R2R",
1060                    Some(BusinessProcess::H2R) => "-H2R",
1061                    _ => "",
1062                };
1063                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1064            }
1065
1066            // 4. line_text: fall back to header_text if not already set
1067            if line.line_text.is_none() {
1068                line.line_text = header_text.clone();
1069            }
1070
1071            // 5. value_date: set to posting_date for AR/AP accounts
1072            if line.value_date.is_none()
1073                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1074            {
1075                line.value_date = Some(posting_date);
1076            }
1077
1078            // 6. assignment: set to vendor/customer reference for AP/AR lines
1079            if line.assignment.is_none() {
1080                if line.gl_account.starts_with("2000") {
1081                    // AP line - use vendor reference from header
1082                    if let Some(ref ht) = header_text {
1083                        // Try to extract vendor ID from header text patterns like "... - V-001"
1084                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
1085                            if vendor_part.starts_with("V-")
1086                                || vendor_part.starts_with("VENDOR")
1087                                || vendor_part.starts_with("Vendor")
1088                            {
1089                                line.assignment = Some(vendor_part.to_string());
1090                            }
1091                        }
1092                    }
1093                } else if line.gl_account.starts_with("1100") {
1094                    // AR line - use customer reference from header
1095                    if let Some(ref ht) = header_text {
1096                        if let Some(customer_part) = ht.rsplit(" - ").next() {
1097                            if customer_part.starts_with("C-")
1098                                || customer_part.starts_with("CUST")
1099                                || customer_part.starts_with("Customer")
1100                            {
1101                                line.assignment = Some(customer_part.to_string());
1102                            }
1103                        }
1104                    }
1105                }
1106            }
1107        }
1108    }
1109
1110    /// Generate a single journal entry.
1111    pub fn generate(&mut self) -> JournalEntry {
1112        debug!(
1113            count = self.count,
1114            companies = self.companies.len(),
1115            start_date = %self.start_date,
1116            end_date = %self.end_date,
1117            "Generating journal entry"
1118        );
1119
1120        // Check if we're in a batch - if so, generate a batched entry
1121        if let Some(ref state) = self.batch_state {
1122            if state.remaining > 0 {
1123                return self.generate_batched_entry();
1124            }
1125        }
1126
1127        self.count += 1;
1128
1129        // Generate deterministic document ID
1130        let document_id = self.generate_deterministic_uuid();
1131
1132        // Sample posting date
1133        let mut posting_date = self
1134            .temporal_sampler
1135            .sample_date(self.start_date, self.end_date);
1136
1137        // Adjust posting date to be a business day if business day calculator is configured
1138        if let Some(ref calc) = self.business_day_calculator {
1139            if !calc.is_business_day(posting_date) {
1140                // Move to next business day
1141                posting_date = calc.next_business_day(posting_date, false);
1142                // Ensure we don't exceed end_date
1143                if posting_date > self.end_date {
1144                    posting_date = calc.prev_business_day(self.end_date, true);
1145                }
1146            }
1147        }
1148
1149        // Select company using weighted selector
1150        let company_code = self.company_selector.select(&mut self.rng).to_string();
1151
1152        // v4.1.0+: draw a single (u, v) pair from the copula — cached for
1153        // both the amount adjustment (u) and the line-count shift (v).
1154        // None when no copula is configured.
1155        let copula_uv: Option<(f64, f64)> =
1156            self.correlation_copula.as_mut().map(|cop| cop.sample());
1157
1158        // Sample line item specification. When a copula is configured,
1159        // v drives line-count via a quantile-preserving map: integer
1160        // count `2 + floor(v * 10)` gives range [2, 11] evenly spaced
1161        // in v, so rank(v) == rank(line_count).
1162        //
1163        // v4.1.6+: upgraded from the v3.5.4 nudge (shift around
1164        // independently-drawn count) to true rank-preserving quantile
1165        // inversion, so empirical Kendall-τ now matches copula theory.
1166        let mut line_spec = self.line_sampler.sample();
1167        if let Some((_u, v)) = copula_uv {
1168            let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
1169            let old_debit = line_spec.debit_count.max(1);
1170            let old_credit = line_spec.credit_count.max(1);
1171            let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
1172                .round() as usize;
1173            let new_debit = new_debit.clamp(1, new_total - 1);
1174            let new_credit = new_total - new_debit;
1175            line_spec.total_count = new_total;
1176            line_spec.debit_count = new_debit;
1177            line_spec.credit_count = new_credit;
1178        }
1179
1180        // Determine source type using full 4-way distribution
1181        let source = self.select_source();
1182        let is_automated = matches!(
1183            source,
1184            TransactionSource::Automated | TransactionSource::Recurring
1185        );
1186
1187        // Select business process
1188        let business_process = self.select_business_process();
1189
1190        // Determine if this is a fraudulent transaction
1191        let fraud_type = self.determine_fraud();
1192        let is_fraud = fraud_type.is_some();
1193
1194        // Sample time based on source
1195        let time = self.temporal_sampler.sample_time(!is_automated);
1196        let created_at = posting_date.and_time(time).and_utc();
1197
1198        // Select user from pool or generate generic
1199        let (created_by, user_persona) = self.select_user(is_automated);
1200
1201        // Create header with deterministic UUID
1202        let mut header =
1203            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1204        header.created_at = created_at;
1205        header.source = source;
1206        header.created_by = created_by;
1207        header.user_persona = user_persona;
1208        header.business_process = Some(business_process);
1209        header.document_type = Self::document_type_for_process(business_process).to_string();
1210        header.is_fraud = is_fraud;
1211        header.fraud_type = fraud_type;
1212
1213        // --- ISA 240 audit flags ---
1214        let is_manual = matches!(source, TransactionSource::Manual);
1215        header.is_manual = is_manual;
1216
1217        // Determine source_system based on manual vs automated
1218        header.source_system = if is_manual {
1219            if self.rng.random::<f64>() < 0.70 {
1220                "manual".to_string()
1221            } else {
1222                "spreadsheet".to_string()
1223            }
1224        } else {
1225            let roll: f64 = self.rng.random();
1226            if roll < 0.40 {
1227                "SAP-FI".to_string()
1228            } else if roll < 0.60 {
1229                "SAP-MM".to_string()
1230            } else if roll < 0.80 {
1231                "SAP-SD".to_string()
1232            } else if roll < 0.95 {
1233                "interface".to_string()
1234            } else {
1235                "SAP-HR".to_string()
1236            }
1237        };
1238
1239        // is_post_close: entry is in the last month of the configured period
1240        // and the posting date falls after the 25th (simulating close cutoff)
1241        let is_post_close = posting_date.month() == self.end_date.month()
1242            && posting_date.year() == self.end_date.year()
1243            && posting_date.day() > 25;
1244        header.is_post_close = is_post_close;
1245
1246        // created_date: for manual entries, same day as posting; for automated,
1247        // 0-3 days before posting_date
1248        let created_date = if is_manual {
1249            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1250        } else {
1251            let lag_days = self.rng.random_range(0i64..=3);
1252            let created_naive_date = posting_date
1253                .checked_sub_signed(chrono::Duration::days(lag_days))
1254                .unwrap_or(posting_date);
1255            created_naive_date.and_hms_opt(
1256                self.rng.random_range(8u32..=17),
1257                self.rng.random_range(0u32..=59),
1258                self.rng.random_range(0u32..=59),
1259            )
1260        };
1261        header.created_date = created_date;
1262
1263        // Generate description context
1264        let mut context =
1265            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1266
1267        // Add vendor/customer context based on business process
1268        match business_process {
1269            BusinessProcess::P2P => {
1270                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1271                    context.vendor_name = Some(vendor.name.clone());
1272                }
1273            }
1274            BusinessProcess::O2C => {
1275                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1276                    context.customer_name = Some(customer.name.clone());
1277                }
1278            }
1279            _ => {}
1280        }
1281
1282        // Generate header text if enabled
1283        if self.template_config.descriptions.generate_header_text {
1284            header.header_text = Some(self.description_generator.generate_header_text(
1285                business_process,
1286                &context,
1287                &mut self.rng,
1288            ));
1289        }
1290
1291        // Generate reference if enabled
1292        if self.template_config.references.generate_references {
1293            header.reference = Some(
1294                self.reference_generator
1295                    .generate_for_process_year(business_process, posting_date.year()),
1296            );
1297        }
1298
1299        // Derive typed source document from reference prefix
1300        header.source_document = header
1301            .reference
1302            .as_deref()
1303            .and_then(DocumentRef::parse)
1304            .or_else(|| {
1305                if header.source == TransactionSource::Manual {
1306                    Some(DocumentRef::Manual)
1307                } else {
1308                    None
1309                }
1310            });
1311
1312        // Generate line items
1313        let mut entry = JournalEntry::new(header);
1314
1315        // Generate amount - use fraud pattern if this is a fraudulent transaction.
1316        // Non-fraud path prefers the v3.4.0 advanced sampler when configured; fraud
1317        // patterns always use the legacy sampler because they target specific
1318        // thresholds (round numbers, just-under-approval amounts) that are
1319        // orthogonal to mixture models.
1320        let base_amount = if let Some(ft) = fraud_type {
1321            let pattern = self.fraud_type_to_amount_pattern(ft);
1322            self.amount_sampler.sample_fraud(pattern)
1323        } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1324            adv.sample_decimal()
1325        } else {
1326            self.amount_sampler.sample()
1327        };
1328        // v3.5.3+: if a conditional-amount override is configured and
1329        // the JE is non-fraud, re-sample the amount from the conditional
1330        // distribution using the computed context. Fraud entries bypass
1331        // this path to preserve fraud-pattern semantics (as with the
1332        // advanced sampler cascade above).
1333        let base_amount = if fraud_type.is_none() {
1334            // Compute input context BEFORE taking &mut on the sampler
1335            // to avoid borrow-checker conflict with the immutable
1336            // `conditional_input_value` call.
1337            let input = self.conditional_input_value(posting_date);
1338            if let Some(ref mut cond) = self.conditional_amount_override {
1339                cond.sample_decimal(input)
1340            } else {
1341                base_amount
1342            }
1343        } else {
1344            base_amount
1345        };
1346
1347        // v4.1.6+: if a copula is configured AND an advanced amount
1348        // sampler with a ppf is available, use true rank-preserving
1349        // inverse-CDF sampling — amount is drawn DIRECTLY from the
1350        // sampler's quantile at `u`, replacing (not nudging) the
1351        // independently-drawn base_amount. This makes empirical
1352        // Kendall-τ match the copula's theoretical τ.
1353        //
1354        // Fallback for copula-without-advanced-sampler: keep the
1355        // v4.1.0 log-scale multiplier nudge (observable correlation,
1356        // diluted magnitude).
1357        let base_amount = if fraud_type.is_none() {
1358            if let Some((u, _v)) = copula_uv {
1359                if let Some(ref adv) = self.advanced_amount_sampler {
1360                    adv.ppf_decimal(u)
1361                } else {
1362                    let log_mult = 4.0 * (u - 0.5);
1363                    let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
1364                    Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1365                }
1366            } else {
1367                base_amount
1368            }
1369        } else {
1370            base_amount
1371        };
1372
1373        // Apply temporal drift if configured
1374        let drift_adjusted_amount = {
1375            let drift = self.get_drift_adjustments(posting_date);
1376            if drift.amount_mean_multiplier != 1.0 {
1377                // Apply drift multiplier (includes seasonal factor if enabled)
1378                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1379                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1380                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1381            } else {
1382                base_amount
1383            }
1384        };
1385
1386        // Apply human variation to amounts for non-automated transactions
1387        let total_amount = if is_automated {
1388            drift_adjusted_amount // Automated systems use exact amounts
1389        } else {
1390            self.apply_human_variation(drift_adjusted_amount)
1391        };
1392
1393        // Generate debit lines
1394        let debit_amounts = self
1395            .amount_sampler
1396            .sample_summing_to(line_spec.debit_count, total_amount);
1397        for (i, amount) in debit_amounts.into_iter().enumerate() {
1398            let account_number = self.select_debit_account().account_number.clone();
1399            let mut line = JournalEntryLine::debit(
1400                entry.header.document_id,
1401                (i + 1) as u32,
1402                account_number.clone(),
1403                amount,
1404            );
1405
1406            // Generate line text if enabled
1407            if self.template_config.descriptions.generate_line_text {
1408                line.line_text = Some(self.description_generator.generate_line_text(
1409                    &account_number,
1410                    &context,
1411                    &mut self.rng,
1412                ));
1413            }
1414
1415            entry.add_line(line);
1416        }
1417
1418        // Generate credit lines - use the SAME amounts to ensure balance
1419        let credit_amounts = self
1420            .amount_sampler
1421            .sample_summing_to(line_spec.credit_count, total_amount);
1422        for (i, amount) in credit_amounts.into_iter().enumerate() {
1423            let account_number = self.select_credit_account().account_number.clone();
1424            let mut line = JournalEntryLine::credit(
1425                entry.header.document_id,
1426                (line_spec.debit_count + i + 1) as u32,
1427                account_number.clone(),
1428                amount,
1429            );
1430
1431            // Generate line text if enabled
1432            if self.template_config.descriptions.generate_line_text {
1433                line.line_text = Some(self.description_generator.generate_line_text(
1434                    &account_number,
1435                    &context,
1436                    &mut self.rng,
1437                ));
1438            }
1439
1440            entry.add_line(line);
1441        }
1442
1443        // Enrich line items with account descriptions, cost centers, etc.
1444        self.enrich_line_items(&mut entry);
1445
1446        // Apply persona-based errors if enabled and it's a human user
1447        if self.persona_errors_enabled && !is_automated {
1448            self.maybe_inject_persona_error(&mut entry);
1449        }
1450
1451        // Apply approval workflow if enabled and amount exceeds threshold
1452        if self.approval_enabled {
1453            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1454        }
1455
1456        // Populate approved_by / approval_date from the approval workflow
1457        self.populate_approval_fields(&mut entry, posting_date);
1458
1459        // Maybe start a batch of similar entries for realism
1460        self.maybe_start_batch(&entry);
1461
1462        entry
1463    }
1464
1465    /// Enable or disable persona-based error injection.
1466    ///
1467    /// When enabled, entries created by human personas have a chance
1468    /// to contain realistic human errors based on their experience level.
1469    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1470        self.persona_errors_enabled = enabled;
1471        self
1472    }
1473
1474    /// Set fraud configuration for fraud injection.
1475    ///
1476    /// When fraud is enabled in the config, transactions have a chance
1477    /// to be marked as fraudulent based on the configured fraud rate.
1478    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1479        self.fraud_config = config;
1480        self
1481    }
1482
1483    /// Check if persona errors are enabled.
1484    pub fn persona_errors_enabled(&self) -> bool {
1485        self.persona_errors_enabled
1486    }
1487
1488    /// Enable or disable batch processing behavior.
1489    ///
1490    /// When enabled (default), the generator will occasionally produce batches
1491    /// of similar entries, simulating how humans batch similar work together.
1492    pub fn with_batching(mut self, enabled: bool) -> Self {
1493        if !enabled {
1494            self.batch_state = None;
1495        }
1496        self
1497    }
1498
1499    /// Check if batch processing is enabled.
1500    pub fn batching_enabled(&self) -> bool {
1501        // Batching is implicitly enabled when not explicitly disabled
1502        true
1503    }
1504
1505    /// Maybe start a batch based on the current entry.
1506    ///
1507    /// Humans often batch similar work: processing invoices from one vendor,
1508    /// entering expense reports for a trip, reconciling similar items.
1509    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1510        // Only start batch for non-automated, non-fraud entries
1511        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1512            return;
1513        }
1514
1515        // 15% chance to start a batch (most work is not batched)
1516        if self.rng.random::<f64>() > 0.15 {
1517            return;
1518        }
1519
1520        // Extract key attributes for batching
1521        let base_account = entry
1522            .lines
1523            .first()
1524            .map(|l| l.gl_account.clone())
1525            .unwrap_or_default();
1526
1527        let base_amount = entry.total_debit();
1528
1529        self.batch_state = Some(BatchState {
1530            base_account_number: base_account,
1531            base_amount,
1532            base_business_process: entry.header.business_process,
1533            base_posting_date: entry.header.posting_date,
1534            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1535        });
1536    }
1537
1538    /// Generate an entry that's part of the current batch.
1539    ///
1540    /// Batched entries have:
1541    /// - Same or very similar business process
1542    /// - Same posting date (batched work done together)
1543    /// - Similar amounts (within ±15%)
1544    /// - Same debit account (processing similar items)
1545    fn generate_batched_entry(&mut self) -> JournalEntry {
1546        use rust_decimal::Decimal;
1547
1548        // Decrement batch counter
1549        if let Some(ref mut state) = self.batch_state {
1550            state.remaining = state.remaining.saturating_sub(1);
1551        }
1552
1553        let Some(batch) = self.batch_state.clone() else {
1554            // This is a programming error - batch_state should be set before calling this method.
1555            // Clear state and fall back to generating a standard entry instead of panicking.
1556            tracing::warn!(
1557                "generate_batched_entry called without batch_state; generating standard entry"
1558            );
1559            self.batch_state = None;
1560            return self.generate();
1561        };
1562
1563        // Use the batch's posting date (work done on same day)
1564        let posting_date = batch.base_posting_date;
1565
1566        self.count += 1;
1567        let document_id = self.generate_deterministic_uuid();
1568
1569        // Select same company (batched work is usually same company)
1570        let company_code = self.company_selector.select(&mut self.rng).to_string();
1571
1572        // Use simplified line spec for batched entries (usually 2-line)
1573        let _line_spec = LineItemSpec {
1574            total_count: 2,
1575            debit_count: 1,
1576            credit_count: 1,
1577            split_type: DebitCreditSplit::Equal,
1578        };
1579
1580        // Batched entries are always manual
1581        let source = TransactionSource::Manual;
1582
1583        // Use the batch's business process
1584        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1585
1586        // Sample time
1587        let time = self.temporal_sampler.sample_time(true);
1588        let created_at = posting_date.and_time(time).and_utc();
1589
1590        // Same user for batched work
1591        let (created_by, user_persona) = self.select_user(false);
1592
1593        // Create header
1594        let mut header =
1595            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1596        header.created_at = created_at;
1597        header.source = source;
1598        header.created_by = created_by;
1599        header.user_persona = user_persona;
1600        header.business_process = Some(business_process);
1601        header.document_type = Self::document_type_for_process(business_process).to_string();
1602
1603        // Batched manual entries have Manual source document
1604        header.source_document = Some(DocumentRef::Manual);
1605
1606        // ISA 240 audit flags for batched entries (always manual)
1607        header.is_manual = true;
1608        header.source_system = if self.rng.random::<f64>() < 0.70 {
1609            "manual".to_string()
1610        } else {
1611            "spreadsheet".to_string()
1612        };
1613        header.is_post_close = posting_date.month() == self.end_date.month()
1614            && posting_date.year() == self.end_date.year()
1615            && posting_date.day() > 25;
1616        header.created_date =
1617            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1618
1619        // Generate similar amount (within ±15% of base)
1620        let variation = self.rng.random_range(-0.15..0.15);
1621        let varied_amount =
1622            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1623        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1624
1625        // Create the entry
1626        let mut entry = JournalEntry::new(header);
1627
1628        // Use same debit account as batch base
1629        let debit_line = JournalEntryLine::debit(
1630            entry.header.document_id,
1631            1,
1632            batch.base_account_number.clone(),
1633            total_amount,
1634        );
1635        entry.add_line(debit_line);
1636
1637        // Select a credit account
1638        let credit_account = self.select_credit_account().account_number.clone();
1639        let credit_line =
1640            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1641        entry.add_line(credit_line);
1642
1643        // Enrich line items with account descriptions, cost centers, etc.
1644        self.enrich_line_items(&mut entry);
1645
1646        // Apply persona-based errors if enabled
1647        if self.persona_errors_enabled {
1648            self.maybe_inject_persona_error(&mut entry);
1649        }
1650
1651        // Apply approval workflow if enabled
1652        if self.approval_enabled {
1653            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1654        }
1655
1656        // Populate approved_by / approval_date from the approval workflow
1657        self.populate_approval_fields(&mut entry, posting_date);
1658
1659        // Clear batch state if no more entries remaining
1660        if batch.remaining <= 1 {
1661            self.batch_state = None;
1662        }
1663
1664        entry
1665    }
1666
1667    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1668    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1669        // Parse persona from the entry header
1670        let persona_str = &entry.header.user_persona;
1671        let persona = match persona_str.to_lowercase().as_str() {
1672            s if s.contains("junior") => UserPersona::JuniorAccountant,
1673            s if s.contains("senior") => UserPersona::SeniorAccountant,
1674            s if s.contains("controller") => UserPersona::Controller,
1675            s if s.contains("manager") => UserPersona::Manager,
1676            s if s.contains("executive") => UserPersona::Executive,
1677            _ => return, // Don't inject errors for unknown personas
1678        };
1679
1680        // Get base error rate from persona
1681        let base_error_rate = persona.error_rate();
1682
1683        // Apply stress factors based on posting date
1684        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1685
1686        // Check if error should occur based on adjusted rate
1687        if self.rng.random::<f64>() >= adjusted_rate {
1688            return; // No error this time
1689        }
1690
1691        // Select and inject persona-appropriate error
1692        self.inject_human_error(entry, persona);
1693    }
1694
1695    /// Apply contextual stress factors to the base error rate.
1696    ///
1697    /// Stress factors increase error likelihood during:
1698    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1699    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1700    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1701    /// - Monday morning (catch-up work): 20% more errors
1702    /// - Friday afternoon (rushing to leave): 30% more errors
1703    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1704        use chrono::Datelike;
1705
1706        let mut rate = base_rate;
1707        let day = posting_date.day();
1708        let month = posting_date.month();
1709
1710        // Year-end stress (December 28-31): double the error rate
1711        if month == 12 && day >= 28 {
1712            rate *= 2.0;
1713            return rate.min(0.5); // Cap at 50% to keep it realistic
1714        }
1715
1716        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1717        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1718            rate *= 1.75; // 75% more errors at quarter end
1719            return rate.min(0.4);
1720        }
1721
1722        // Month-end stress (last 3 days of month)
1723        if day >= 28 {
1724            rate *= 1.5; // 50% more errors at month end
1725        }
1726
1727        // Day-of-week stress effects
1728        let weekday = posting_date.weekday();
1729        match weekday {
1730            chrono::Weekday::Mon => {
1731                // Monday: catching up, often rushed
1732                rate *= 1.2;
1733            }
1734            chrono::Weekday::Fri => {
1735                // Friday: rushing to finish before weekend
1736                rate *= 1.3;
1737            }
1738            _ => {}
1739        }
1740
1741        // Cap at 40% to keep it realistic
1742        rate.min(0.4)
1743    }
1744
1745    /// Apply human-like variation to an amount.
1746    ///
1747    /// Humans don't enter perfectly calculated amounts - they:
1748    /// - Round amounts differently
1749    /// - Estimate instead of calculating exactly
1750    /// - Make small input variations
1751    ///
1752    /// This applies small variations (typically ±2%) to make amounts more realistic.
1753    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1754        use rust_decimal::Decimal;
1755
1756        // Automated transactions or very small amounts don't get variation
1757        if amount < Decimal::from(10) {
1758            return amount;
1759        }
1760
1761        // 70% chance of human variation being applied
1762        if self.rng.random::<f64>() > 0.70 {
1763            return amount;
1764        }
1765
1766        // Decide which type of human variation to apply
1767        let variation_type: u8 = self.rng.random_range(0..4);
1768
1769        match variation_type {
1770            0 => {
1771                // ±2% variation (common for estimated amounts)
1772                let variation_pct = self.rng.random_range(-0.02..0.02);
1773                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1774                (amount + variation).round_dp(2)
1775            }
1776            1 => {
1777                // Round to nearest $10
1778                let ten = Decimal::from(10);
1779                (amount / ten).round() * ten
1780            }
1781            2 => {
1782                // Round to nearest $100 (for larger amounts)
1783                if amount >= Decimal::from(500) {
1784                    let hundred = Decimal::from(100);
1785                    (amount / hundred).round() * hundred
1786                } else {
1787                    amount
1788                }
1789            }
1790            3 => {
1791                // Slight under/over payment (±$0.01 to ±$1.00)
1792                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1793                (amount + cents).max(Decimal::ZERO).round_dp(2)
1794            }
1795            _ => amount,
1796        }
1797    }
1798
1799    /// Rebalance an entry after a one-sided amount modification.
1800    ///
1801    /// When an error modifies one line's amount, this finds a line on the opposite
1802    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1803    /// same impact to maintain balance.
1804    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1805        // Find a line on the opposite side to adjust
1806        let balancing_idx = entry.lines.iter().position(|l| {
1807            if modified_was_debit {
1808                l.credit_amount > Decimal::ZERO
1809            } else {
1810                l.debit_amount > Decimal::ZERO
1811            }
1812        });
1813
1814        if let Some(idx) = balancing_idx {
1815            if modified_was_debit {
1816                entry.lines[idx].credit_amount += impact;
1817            } else {
1818                entry.lines[idx].debit_amount += impact;
1819            }
1820        }
1821    }
1822
1823    /// Inject a human-like error based on the persona.
1824    ///
1825    /// All error types maintain balance - amount modifications are applied to both sides.
1826    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1827    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1828        use rust_decimal::Decimal;
1829
1830        // Different personas make different types of errors
1831        let error_type: u8 = match persona {
1832            UserPersona::JuniorAccountant => {
1833                // Junior accountants make more varied errors
1834                self.rng.random_range(0..5)
1835            }
1836            UserPersona::SeniorAccountant => {
1837                // Senior accountants mainly make transposition errors
1838                self.rng.random_range(0..3)
1839            }
1840            UserPersona::Controller | UserPersona::Manager => {
1841                // Controllers/managers mainly make rounding or cutoff errors
1842                self.rng.random_range(3..5)
1843            }
1844            _ => return,
1845        };
1846
1847        match error_type {
1848            0 => {
1849                // Transposed digits in an amount
1850                if let Some(line) = entry.lines.get_mut(0) {
1851                    let is_debit = line.debit_amount > Decimal::ZERO;
1852                    let original_amount = if is_debit {
1853                        line.debit_amount
1854                    } else {
1855                        line.credit_amount
1856                    };
1857
1858                    // Simple digit swap in the string representation
1859                    let s = original_amount.to_string();
1860                    if s.len() >= 2 {
1861                        let chars: Vec<char> = s.chars().collect();
1862                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1863                        if chars[pos].is_ascii_digit()
1864                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1865                        {
1866                            let mut new_chars = chars;
1867                            new_chars.swap(pos, pos + 1);
1868                            if let Ok(new_amount) =
1869                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1870                            {
1871                                let impact = new_amount - original_amount;
1872
1873                                // Apply to the modified line
1874                                if is_debit {
1875                                    entry.lines[0].debit_amount = new_amount;
1876                                } else {
1877                                    entry.lines[0].credit_amount = new_amount;
1878                                }
1879
1880                                // Rebalance the entry
1881                                Self::rebalance_entry(entry, is_debit, impact);
1882
1883                                entry.header.header_text = Some(
1884                                    entry.header.header_text.clone().unwrap_or_default()
1885                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1886                                );
1887                            }
1888                        }
1889                    }
1890                }
1891            }
1892            1 => {
1893                // Wrong decimal place (off by factor of 10)
1894                if let Some(line) = entry.lines.get_mut(0) {
1895                    let is_debit = line.debit_amount > Decimal::ZERO;
1896                    let original_amount = if is_debit {
1897                        line.debit_amount
1898                    } else {
1899                        line.credit_amount
1900                    };
1901
1902                    let new_amount = original_amount * Decimal::new(10, 0);
1903                    let impact = new_amount - original_amount;
1904
1905                    // Apply to the modified line
1906                    if is_debit {
1907                        entry.lines[0].debit_amount = new_amount;
1908                    } else {
1909                        entry.lines[0].credit_amount = new_amount;
1910                    }
1911
1912                    // Rebalance the entry
1913                    Self::rebalance_entry(entry, is_debit, impact);
1914
1915                    entry.header.header_text = Some(
1916                        entry.header.header_text.clone().unwrap_or_default()
1917                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1918                    );
1919                }
1920            }
1921            2 => {
1922                // Typo in description (doesn't affect balance)
1923                if let Some(ref mut text) = entry.header.header_text {
1924                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1925                    let correct = ["the", "and", "with", "that", "receive"];
1926                    let idx = self.rng.random_range(0..typos.len());
1927                    if text.to_lowercase().contains(correct[idx]) {
1928                        *text = text.replace(correct[idx], typos[idx]);
1929                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1930                    }
1931                }
1932            }
1933            3 => {
1934                // Rounding to round number
1935                if let Some(line) = entry.lines.get_mut(0) {
1936                    let is_debit = line.debit_amount > Decimal::ZERO;
1937                    let original_amount = if is_debit {
1938                        line.debit_amount
1939                    } else {
1940                        line.credit_amount
1941                    };
1942
1943                    let new_amount =
1944                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1945                    let impact = new_amount - original_amount;
1946
1947                    // Apply to the modified line
1948                    if is_debit {
1949                        entry.lines[0].debit_amount = new_amount;
1950                    } else {
1951                        entry.lines[0].credit_amount = new_amount;
1952                    }
1953
1954                    // Rebalance the entry
1955                    Self::rebalance_entry(entry, is_debit, impact);
1956
1957                    entry.header.header_text = Some(
1958                        entry.header.header_text.clone().unwrap_or_default()
1959                            + " [HUMAN_ERROR:ROUNDED]",
1960                    );
1961                }
1962            }
1963            // Late posting marker (document date much earlier than posting
1964            // date). Doesn't create an imbalance.
1965            4 if entry.header.document_date == entry.header.posting_date => {
1966                let days_late = self.rng.random_range(5..15);
1967                entry.header.document_date =
1968                    entry.header.posting_date - chrono::Duration::days(days_late);
1969                entry.header.header_text = Some(
1970                    entry.header.header_text.clone().unwrap_or_default()
1971                        + " [HUMAN_ERROR:LATE_POSTING]",
1972                );
1973            }
1974            _ => {}
1975        }
1976    }
1977
1978    /// Apply approval workflow for high-value transactions.
1979    ///
1980    /// If the entry amount exceeds the approval threshold, simulate an
1981    /// approval workflow with appropriate approvers based on amount.
1982    fn maybe_apply_approval_workflow(
1983        &mut self,
1984        entry: &mut JournalEntry,
1985        _posting_date: NaiveDate,
1986    ) {
1987        use rust_decimal::Decimal;
1988
1989        let amount = entry.total_debit();
1990
1991        // Skip if amount is below threshold
1992        if amount <= self.approval_threshold {
1993            // Auto-approved below threshold
1994            let workflow = ApprovalWorkflow::auto_approved(
1995                entry.header.created_by.clone(),
1996                entry.header.user_persona.clone(),
1997                amount,
1998                entry.header.created_at,
1999            );
2000            entry.header.approval_workflow = Some(workflow);
2001            return;
2002        }
2003
2004        // Mark as SOX relevant for high-value transactions
2005        entry.header.sox_relevant = true;
2006
2007        // Determine required approval levels based on amount
2008        let required_levels = if amount > Decimal::new(100000, 0) {
2009            3 // Executive approval required
2010        } else if amount > Decimal::new(50000, 0) {
2011            2 // Senior management approval
2012        } else {
2013            1 // Manager approval
2014        };
2015
2016        // Create the approval workflow
2017        let mut workflow = ApprovalWorkflow::new(
2018            entry.header.created_by.clone(),
2019            entry.header.user_persona.clone(),
2020            amount,
2021        );
2022        workflow.required_levels = required_levels;
2023
2024        // Simulate submission
2025        let submit_time = entry.header.created_at;
2026        let submit_action = ApprovalAction::new(
2027            entry.header.created_by.clone(),
2028            entry.header.user_persona.clone(),
2029            self.parse_persona(&entry.header.user_persona),
2030            ApprovalActionType::Submit,
2031            0,
2032        )
2033        .with_timestamp(submit_time);
2034
2035        workflow.actions.push(submit_action);
2036        workflow.status = ApprovalStatus::Pending;
2037        workflow.submitted_at = Some(submit_time);
2038
2039        // Simulate approvals with realistic delays
2040        let mut current_time = submit_time;
2041        for level in 1..=required_levels {
2042            // Add delay for approval (1-3 business hours per level)
2043            let delay_hours = self.rng.random_range(1..4);
2044            current_time += chrono::Duration::hours(delay_hours);
2045
2046            // Skip weekends
2047            while current_time.weekday() == chrono::Weekday::Sat
2048                || current_time.weekday() == chrono::Weekday::Sun
2049            {
2050                current_time += chrono::Duration::days(1);
2051            }
2052
2053            // Generate approver based on level
2054            let (approver_id, approver_role) = self.select_approver(level);
2055
2056            let approve_action = ApprovalAction::new(
2057                approver_id.clone(),
2058                approver_role.to_string(),
2059                approver_role,
2060                ApprovalActionType::Approve,
2061                level,
2062            )
2063            .with_timestamp(current_time);
2064
2065            workflow.actions.push(approve_action);
2066            workflow.current_level = level;
2067        }
2068
2069        // Mark as approved
2070        workflow.status = ApprovalStatus::Approved;
2071        workflow.approved_at = Some(current_time);
2072
2073        entry.header.approval_workflow = Some(workflow);
2074    }
2075
2076    /// Select an approver based on the required level.
2077    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
2078        let persona = match level {
2079            1 => UserPersona::Manager,
2080            2 => UserPersona::Controller,
2081            _ => UserPersona::Executive,
2082        };
2083
2084        // Try to get from user pool first
2085        if let Some(ref pool) = self.user_pool {
2086            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2087                return (user.user_id.clone(), persona);
2088            }
2089        }
2090
2091        // Fallback to generated approver
2092        let approver_id = match persona {
2093            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2094            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2095            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2096            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2097        };
2098
2099        (approver_id, persona)
2100    }
2101
2102    /// Parse user persona from string.
2103    fn parse_persona(&self, persona_str: &str) -> UserPersona {
2104        match persona_str.to_lowercase().as_str() {
2105            s if s.contains("junior") => UserPersona::JuniorAccountant,
2106            s if s.contains("senior") => UserPersona::SeniorAccountant,
2107            s if s.contains("controller") => UserPersona::Controller,
2108            s if s.contains("manager") => UserPersona::Manager,
2109            s if s.contains("executive") => UserPersona::Executive,
2110            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2111            _ => UserPersona::JuniorAccountant, // Default
2112        }
2113    }
2114
2115    /// Enable or disable approval workflow.
2116    pub fn with_approval(mut self, enabled: bool) -> Self {
2117        self.approval_enabled = enabled;
2118        self
2119    }
2120
2121    /// Set the approval threshold amount.
2122    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2123        self.approval_threshold = threshold;
2124        self
2125    }
2126
2127    /// Set the SOD violation rate for approval tracking.
2128    ///
2129    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
2130    /// that the approver is the same as the creator, which constitutes a SOD violation.
2131    /// Default is 0.10 (10%).
2132    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2133        self.sod_violation_rate = rate;
2134        self
2135    }
2136
2137    /// Populate `approved_by` and `approval_date` from the approval workflow,
2138    /// and flag SOD violations when the approver matches the creator.
2139    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2140        if let Some(ref workflow) = entry.header.approval_workflow {
2141            // Extract the last approver from the workflow actions
2142            let last_approver = workflow
2143                .actions
2144                .iter()
2145                .rev()
2146                .find(|a| matches!(a.action, ApprovalActionType::Approve));
2147
2148            if let Some(approver_action) = last_approver {
2149                entry.header.approved_by = Some(approver_action.actor_id.clone());
2150                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2151            } else {
2152                // No explicit approver (auto-approved); use the preparer
2153                entry.header.approved_by = Some(workflow.preparer_id.clone());
2154                entry.header.approval_date = Some(posting_date);
2155            }
2156
2157            // Inject SOD violation: with configured probability, set approver = creator
2158            if self.rng.random::<f64>() < self.sod_violation_rate {
2159                let creator = entry.header.created_by.clone();
2160                entry.header.approved_by = Some(creator);
2161                entry.header.sod_violation = true;
2162                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2163            }
2164        }
2165    }
2166
2167    /// Set the temporal drift controller for simulating distribution changes over time.
2168    ///
2169    /// When drift is enabled, amounts and other distributions will shift based on
2170    /// the period (month) to simulate realistic temporal evolution like inflation
2171    /// or increasing fraud rates.
2172    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2173        self.drift_controller = Some(controller);
2174        self
2175    }
2176
2177    /// Set drift configuration directly.
2178    ///
2179    /// Creates a drift controller from the config. Total periods is calculated
2180    /// from the date range.
2181    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2182        if config.enabled {
2183            let total_periods = self.calculate_total_periods();
2184            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2185        }
2186        self
2187    }
2188
2189    /// Calculate total periods (months) in the date range.
2190    fn calculate_total_periods(&self) -> u32 {
2191        let start_year = self.start_date.year();
2192        let start_month = self.start_date.month();
2193        let end_year = self.end_date.year();
2194        let end_month = self.end_date.month();
2195
2196        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2197    }
2198
2199    /// Calculate the period number (0-indexed) for a given date.
2200    fn date_to_period(&self, date: NaiveDate) -> u32 {
2201        let start_year = self.start_date.year();
2202        let start_month = self.start_date.month() as i32;
2203        let date_year = date.year();
2204        let date_month = date.month() as i32;
2205
2206        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2207    }
2208
2209    /// Get drift adjustments for a given date.
2210    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2211        if let Some(ref controller) = self.drift_controller {
2212            let period = self.date_to_period(date);
2213            controller.compute_adjustments(period)
2214        } else {
2215            DriftAdjustments::none()
2216        }
2217    }
2218
2219    /// Select a user from the pool or generate a generic user ID.
2220    #[inline]
2221    fn select_user(&mut self, is_automated: bool) -> (String, String) {
2222        if let Some(ref pool) = self.user_pool {
2223            let persona = if is_automated {
2224                UserPersona::AutomatedSystem
2225            } else {
2226                // Random distribution among human personas
2227                let roll: f64 = self.rng.random();
2228                if roll < 0.4 {
2229                    UserPersona::JuniorAccountant
2230                } else if roll < 0.7 {
2231                    UserPersona::SeniorAccountant
2232                } else if roll < 0.85 {
2233                    UserPersona::Controller
2234                } else {
2235                    UserPersona::Manager
2236                }
2237            };
2238
2239            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2240                return (user.user_id.clone(), user.persona.to_string());
2241            }
2242        }
2243
2244        // Fallback to generic format
2245        if is_automated {
2246            (
2247                format!("BATCH{:04}", self.rng.random_range(1..=20)),
2248                "automated_system".to_string(),
2249            )
2250        } else {
2251            (
2252                format!("USER{:04}", self.rng.random_range(1..=40)),
2253                "senior_accountant".to_string(),
2254            )
2255        }
2256    }
2257
2258    /// Select transaction source based on configuration weights.
2259    #[inline]
2260    fn select_source(&mut self) -> TransactionSource {
2261        let roll: f64 = self.rng.random();
2262        let dist = &self.config.source_distribution;
2263
2264        if roll < dist.manual {
2265            TransactionSource::Manual
2266        } else if roll < dist.manual + dist.automated {
2267            TransactionSource::Automated
2268        } else if roll < dist.manual + dist.automated + dist.recurring {
2269            TransactionSource::Recurring
2270        } else {
2271            TransactionSource::Adjustment
2272        }
2273    }
2274
2275    /// Select a business process based on configuration weights.
2276    #[inline]
2277    /// Map a business process to a SAP-style document type code.
2278    ///
2279    /// - P2P → "KR" (vendor invoice)
2280    /// - O2C → "DR" (customer invoice)
2281    /// - R2R → "SA" (general journal)
2282    /// - H2R → "HR" (HR posting)
2283    /// - A2R → "AA" (asset posting)
2284    /// - others → "SA"
2285    fn document_type_for_process(process: BusinessProcess) -> &'static str {
2286        match process {
2287            BusinessProcess::P2P => "KR",
2288            BusinessProcess::O2C => "DR",
2289            BusinessProcess::R2R => "SA",
2290            BusinessProcess::H2R => "HR",
2291            BusinessProcess::A2R => "AA",
2292            _ => "SA",
2293        }
2294    }
2295
2296    fn select_business_process(&mut self) -> BusinessProcess {
2297        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2298    }
2299
2300    #[inline]
2301    fn select_debit_account(&mut self) -> &GLAccount {
2302        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2303        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2304
2305        // 60% asset, 40% expense for debits
2306        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2307            accounts
2308        } else {
2309            expense_accounts
2310        };
2311
2312        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2313            tracing::warn!(
2314                "Account selection returned empty list, falling back to first COA account"
2315            );
2316            &self.coa.accounts[0]
2317        })
2318    }
2319
2320    #[inline]
2321    fn select_credit_account(&mut self) -> &GLAccount {
2322        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2323        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2324
2325        // 60% liability, 40% revenue for credits
2326        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2327            liability_accounts
2328        } else {
2329            revenue_accounts
2330        };
2331
2332        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2333            tracing::warn!(
2334                "Account selection returned empty list, falling back to first COA account"
2335            );
2336            &self.coa.accounts[0]
2337        })
2338    }
2339}
2340
2341impl Generator for JournalEntryGenerator {
2342    type Item = JournalEntry;
2343    type Config = (
2344        TransactionConfig,
2345        Arc<ChartOfAccounts>,
2346        Vec<String>,
2347        NaiveDate,
2348        NaiveDate,
2349    );
2350
2351    fn new(config: Self::Config, seed: u64) -> Self {
2352        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2353    }
2354
2355    fn generate_one(&mut self) -> Self::Item {
2356        self.generate()
2357    }
2358
2359    fn reset(&mut self) {
2360        self.rng = seeded_rng(self.seed, 0);
2361        self.line_sampler.reset(self.seed + 1);
2362        self.amount_sampler.reset(self.seed + 2);
2363        self.temporal_sampler.reset(self.seed + 3);
2364        if let Some(ref mut adv) = self.advanced_amount_sampler {
2365            adv.reset(self.seed + 2);
2366        }
2367        self.count = 0;
2368        self.uuid_factory.reset();
2369
2370        // Reset reference generator by recreating it
2371        let mut ref_gen = ReferenceGenerator::new(
2372            self.start_date.year(),
2373            self.companies
2374                .first()
2375                .map(std::string::String::as_str)
2376                .unwrap_or("1000"),
2377        );
2378        ref_gen.set_prefix(
2379            ReferenceType::Invoice,
2380            &self.template_config.references.invoice_prefix,
2381        );
2382        ref_gen.set_prefix(
2383            ReferenceType::PurchaseOrder,
2384            &self.template_config.references.po_prefix,
2385        );
2386        ref_gen.set_prefix(
2387            ReferenceType::SalesOrder,
2388            &self.template_config.references.so_prefix,
2389        );
2390        self.reference_generator = ref_gen;
2391    }
2392
2393    fn count(&self) -> u64 {
2394        self.count
2395    }
2396
2397    fn seed(&self) -> u64 {
2398        self.seed
2399    }
2400}
2401
2402use datasynth_core::traits::ParallelGenerator;
2403
2404impl ParallelGenerator for JournalEntryGenerator {
2405    /// Split this generator into `parts` independent sub-generators.
2406    ///
2407    /// Each sub-generator gets a deterministic seed derived from the parent seed
2408    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2409    /// The results are deterministic for a given partition count.
2410    fn split(self, parts: usize) -> Vec<Self> {
2411        let parts = parts.max(1);
2412        (0..parts)
2413            .map(|i| {
2414                // Derive a unique seed per partition using a golden-ratio constant
2415                let sub_seed = self
2416                    .seed
2417                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2418
2419                let mut gen = JournalEntryGenerator::new_with_full_config(
2420                    self.config.clone(),
2421                    Arc::clone(&self.coa),
2422                    self.companies.clone(),
2423                    self.start_date,
2424                    self.end_date,
2425                    sub_seed,
2426                    self.template_config.clone(),
2427                    self.user_pool.clone(),
2428                );
2429
2430                // Copy over configuration state
2431                gen.company_selector = self.company_selector.clone();
2432                gen.vendor_pool = self.vendor_pool.clone();
2433                gen.customer_pool = self.customer_pool.clone();
2434                gen.material_pool = self.material_pool.clone();
2435                gen.using_real_master_data = self.using_real_master_data;
2436                gen.fraud_config = self.fraud_config.clone();
2437                gen.persona_errors_enabled = self.persona_errors_enabled;
2438                gen.approval_enabled = self.approval_enabled;
2439                gen.approval_threshold = self.approval_threshold;
2440                gen.sod_violation_rate = self.sod_violation_rate;
2441                // v3.4.0+: advanced amount sampler (mixture / Pareto /
2442                // Gaussian). Clone and reset the internal RNG with the
2443                // partition's sub_seed so each worker explores a unique
2444                // subsequence without repeating the parent stream.
2445                if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2446                    adv.reset(sub_seed.wrapping_add(2));
2447                    gen.advanced_amount_sampler = Some(adv);
2448                }
2449                // v3.5.3+: conditional amount override — clone + reset
2450                // so each partition gets a fresh deterministic stream.
2451                if let Some(mut cond) = self.conditional_amount_override.clone() {
2452                    cond.reset(sub_seed.wrapping_add(17));
2453                    gen.conditional_amount_override = Some(cond);
2454                }
2455                // v3.5.4+: copula sampler — clone + reset per partition.
2456                if let Some(mut cop) = self.correlation_copula.clone() {
2457                    cop.reset(sub_seed.wrapping_add(31));
2458                    gen.correlation_copula = Some(cop);
2459                }
2460
2461                // Use partitioned UUID factory to eliminate atomic contention
2462                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2463                    sub_seed,
2464                    GeneratorType::JournalEntry,
2465                    i as u8,
2466                );
2467
2468                // Copy temporal patterns if configured
2469                if let Some(ref config) = self.temporal_patterns_config {
2470                    gen.temporal_patterns_config = Some(config.clone());
2471                    // Rebuild business day calculator from the stored config
2472                    if config.business_days.enabled {
2473                        if let Some(ref bdc) = self.business_day_calculator {
2474                            gen.business_day_calculator = Some(bdc.clone());
2475                        }
2476                    }
2477                    // Rebuild processing lag calculator with partition seed
2478                    if config.processing_lags.enabled {
2479                        let lag_config =
2480                            Self::convert_processing_lag_config(&config.processing_lags);
2481                        gen.processing_lag_calculator =
2482                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2483                    }
2484                }
2485
2486                // Copy drift controller if present
2487                if let Some(ref dc) = self.drift_controller {
2488                    gen.drift_controller = Some(dc.clone());
2489                }
2490
2491                gen
2492            })
2493            .collect()
2494    }
2495}
2496
2497#[cfg(test)]
2498#[allow(clippy::unwrap_used)]
2499mod tests {
2500    use super::*;
2501    use crate::ChartOfAccountsGenerator;
2502
2503    #[test]
2504    fn test_generate_balanced_entries() {
2505        let mut coa_gen =
2506            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2507        let coa = Arc::new(coa_gen.generate());
2508
2509        let mut je_gen = JournalEntryGenerator::new_with_params(
2510            TransactionConfig::default(),
2511            coa,
2512            vec!["1000".to_string()],
2513            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2514            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2515            42,
2516        );
2517
2518        let mut balanced_count = 0;
2519        for _ in 0..100 {
2520            let entry = je_gen.generate();
2521
2522            // Skip entries with human errors as they may be intentionally unbalanced
2523            let has_human_error = entry
2524                .header
2525                .header_text
2526                .as_ref()
2527                .map(|t| t.contains("[HUMAN_ERROR:"))
2528                .unwrap_or(false);
2529
2530            if !has_human_error {
2531                assert!(
2532                    entry.is_balanced(),
2533                    "Entry {:?} is not balanced",
2534                    entry.header.document_id
2535                );
2536                balanced_count += 1;
2537            }
2538            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2539        }
2540
2541        // Ensure most entries are balanced (human errors are rare)
2542        assert!(
2543            balanced_count >= 80,
2544            "Expected at least 80 balanced entries, got {}",
2545            balanced_count
2546        );
2547    }
2548
2549    #[test]
2550    fn test_deterministic_generation() {
2551        let mut coa_gen =
2552            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2553        let coa = Arc::new(coa_gen.generate());
2554
2555        let mut gen1 = JournalEntryGenerator::new_with_params(
2556            TransactionConfig::default(),
2557            Arc::clone(&coa),
2558            vec!["1000".to_string()],
2559            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2560            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2561            42,
2562        );
2563
2564        let mut gen2 = JournalEntryGenerator::new_with_params(
2565            TransactionConfig::default(),
2566            coa,
2567            vec!["1000".to_string()],
2568            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2569            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2570            42,
2571        );
2572
2573        for _ in 0..50 {
2574            let e1 = gen1.generate();
2575            let e2 = gen2.generate();
2576            assert_eq!(e1.header.document_id, e2.header.document_id);
2577            assert_eq!(e1.total_debit(), e2.total_debit());
2578        }
2579    }
2580
2581    #[test]
2582    fn test_templates_generate_descriptions() {
2583        let mut coa_gen =
2584            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2585        let coa = Arc::new(coa_gen.generate());
2586
2587        // Enable all template features
2588        let template_config = TemplateConfig {
2589            names: datasynth_config::schema::NameTemplateConfig {
2590                generate_realistic_names: true,
2591                email_domain: "test.com".to_string(),
2592                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2593            },
2594            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2595                generate_header_text: true,
2596                generate_line_text: true,
2597            },
2598            references: datasynth_config::schema::ReferenceTemplateConfig {
2599                generate_references: true,
2600                invoice_prefix: "TEST-INV".to_string(),
2601                po_prefix: "TEST-PO".to_string(),
2602                so_prefix: "TEST-SO".to_string(),
2603            },
2604            path: None,
2605            merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2606        };
2607
2608        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2609            TransactionConfig::default(),
2610            coa,
2611            vec!["1000".to_string()],
2612            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2613            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2614            42,
2615            template_config,
2616            None,
2617        )
2618        .with_persona_errors(false); // Disable for template testing
2619
2620        for _ in 0..10 {
2621            let entry = je_gen.generate();
2622
2623            // Verify header text is populated
2624            assert!(
2625                entry.header.header_text.is_some(),
2626                "Header text should be populated"
2627            );
2628
2629            // Verify reference is populated
2630            assert!(
2631                entry.header.reference.is_some(),
2632                "Reference should be populated"
2633            );
2634
2635            // Verify business process is set
2636            assert!(
2637                entry.header.business_process.is_some(),
2638                "Business process should be set"
2639            );
2640
2641            // Verify line text is populated
2642            for line in &entry.lines {
2643                assert!(line.line_text.is_some(), "Line text should be populated");
2644            }
2645
2646            // Entry should still be balanced
2647            assert!(entry.is_balanced());
2648        }
2649    }
2650
2651    #[test]
2652    fn test_user_pool_integration() {
2653        let mut coa_gen =
2654            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2655        let coa = Arc::new(coa_gen.generate());
2656
2657        let companies = vec!["1000".to_string()];
2658
2659        // Generate user pool
2660        let mut user_gen = crate::UserGenerator::new(42);
2661        let user_pool = user_gen.generate_standard(&companies);
2662
2663        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2664            TransactionConfig::default(),
2665            coa,
2666            companies,
2667            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2668            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2669            42,
2670            TemplateConfig::default(),
2671            Some(user_pool),
2672        );
2673
2674        // Generate entries and verify user IDs are from pool
2675        for _ in 0..20 {
2676            let entry = je_gen.generate();
2677
2678            // User ID should not be generic BATCH/USER format when pool is used
2679            // (though it may still fall back if random selection misses)
2680            assert!(!entry.header.created_by.is_empty());
2681        }
2682    }
2683
2684    #[test]
2685    fn test_master_data_connection() {
2686        let mut coa_gen =
2687            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2688        let coa = Arc::new(coa_gen.generate());
2689
2690        // Create test vendors
2691        let vendors = vec![
2692            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2693            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2694        ];
2695
2696        // Create test customers
2697        let customers = vec![
2698            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2699            Customer::new(
2700                "C-TEST-002",
2701                "Test Customer Two",
2702                CustomerType::SmallBusiness,
2703            ),
2704        ];
2705
2706        // Create test materials
2707        let materials = vec![Material::new(
2708            "MAT-TEST-001",
2709            "Test Material A",
2710            MaterialType::RawMaterial,
2711        )];
2712
2713        // Create generator with master data
2714        let generator = JournalEntryGenerator::new_with_params(
2715            TransactionConfig::default(),
2716            coa,
2717            vec!["1000".to_string()],
2718            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2719            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2720            42,
2721        );
2722
2723        // Without master data
2724        assert!(!generator.is_using_real_master_data());
2725
2726        // Connect master data
2727        let generator_with_data = generator
2728            .with_vendors(&vendors)
2729            .with_customers(&customers)
2730            .with_materials(&materials);
2731
2732        // Should now be using real master data
2733        assert!(generator_with_data.is_using_real_master_data());
2734    }
2735
2736    #[test]
2737    fn test_with_master_data_convenience_method() {
2738        let mut coa_gen =
2739            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2740        let coa = Arc::new(coa_gen.generate());
2741
2742        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2743        let customers = vec![Customer::new(
2744            "C-001",
2745            "Customer One",
2746            CustomerType::Corporate,
2747        )];
2748        let materials = vec![Material::new(
2749            "MAT-001",
2750            "Material One",
2751            MaterialType::RawMaterial,
2752        )];
2753
2754        let generator = JournalEntryGenerator::new_with_params(
2755            TransactionConfig::default(),
2756            coa,
2757            vec!["1000".to_string()],
2758            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2759            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2760            42,
2761        )
2762        .with_master_data(&vendors, &customers, &materials);
2763
2764        assert!(generator.is_using_real_master_data());
2765    }
2766
2767    #[test]
2768    fn test_stress_factors_increase_error_rate() {
2769        let mut coa_gen =
2770            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2771        let coa = Arc::new(coa_gen.generate());
2772
2773        let generator = JournalEntryGenerator::new_with_params(
2774            TransactionConfig::default(),
2775            coa,
2776            vec!["1000".to_string()],
2777            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2778            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2779            42,
2780        );
2781
2782        let base_rate = 0.1;
2783
2784        // Regular day - no stress factors
2785        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2786        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2787        assert!(
2788            (regular_rate - base_rate).abs() < 0.01,
2789            "Regular day should have minimal stress factor adjustment"
2790        );
2791
2792        // Month end - 50% more errors
2793        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2794        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2795        assert!(
2796            month_end_rate > regular_rate,
2797            "Month end should have higher error rate than regular day"
2798        );
2799
2800        // Year end - double the error rate
2801        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2802        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2803        assert!(
2804            year_end_rate > month_end_rate,
2805            "Year end should have highest error rate"
2806        );
2807
2808        // Friday stress
2809        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2810        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2811        assert!(
2812            friday_rate > regular_rate,
2813            "Friday should have higher error rate than mid-week"
2814        );
2815
2816        // Monday stress
2817        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2818        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2819        assert!(
2820            monday_rate > regular_rate,
2821            "Monday should have higher error rate than mid-week"
2822        );
2823    }
2824
2825    #[test]
2826    fn test_batching_produces_similar_entries() {
2827        let mut coa_gen =
2828            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2829        let coa = Arc::new(coa_gen.generate());
2830
2831        // Use seed 123 which is more likely to trigger batching
2832        let mut je_gen = JournalEntryGenerator::new_with_params(
2833            TransactionConfig::default(),
2834            coa,
2835            vec!["1000".to_string()],
2836            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2837            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2838            123,
2839        )
2840        .with_persona_errors(false); // Disable to ensure balanced entries
2841
2842        // Generate many entries - at 15% batch rate, should see some batches
2843        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2844
2845        // Check that all entries are balanced (batched or not)
2846        for entry in &entries {
2847            assert!(
2848                entry.is_balanced(),
2849                "All entries including batched should be balanced"
2850            );
2851        }
2852
2853        // Count entries with same-day posting dates (batch indicator)
2854        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2855            std::collections::HashMap::new();
2856        for entry in &entries {
2857            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2858        }
2859
2860        // With batching, some dates should have multiple entries
2861        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2862        assert!(
2863            dates_with_multiple > 0,
2864            "With batching, should see some dates with multiple entries"
2865        );
2866    }
2867
2868    #[test]
2869    fn test_temporal_patterns_business_days() {
2870        use datasynth_config::schema::{
2871            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2872        };
2873
2874        let mut coa_gen =
2875            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2876        let coa = Arc::new(coa_gen.generate());
2877
2878        // Create temporal patterns config with business days enabled
2879        let temporal_config = TemporalPatternsConfig {
2880            enabled: true,
2881            business_days: BusinessDaySchemaConfig {
2882                enabled: true,
2883                ..Default::default()
2884            },
2885            calendars: CalendarSchemaConfig {
2886                regions: vec!["US".to_string()],
2887                custom_holidays: vec![],
2888            },
2889            ..Default::default()
2890        };
2891
2892        let mut je_gen = JournalEntryGenerator::new_with_params(
2893            TransactionConfig::default(),
2894            coa,
2895            vec!["1000".to_string()],
2896            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2897            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2898            42,
2899        )
2900        .with_temporal_patterns(temporal_config, 42)
2901        .with_persona_errors(false);
2902
2903        // Generate entries and verify none fall on weekends
2904        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2905
2906        for entry in &entries {
2907            let weekday = entry.header.posting_date.weekday();
2908            assert!(
2909                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2910                "Posting date {:?} should not be a weekend",
2911                entry.header.posting_date
2912            );
2913        }
2914    }
2915
2916    #[test]
2917    fn test_default_generation_filters_weekends() {
2918        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2919        // This tests the fix where new_with_full_config always creates a default
2920        // BusinessDayCalculator with US holidays as a fallback.
2921        let mut coa_gen =
2922            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2923        let coa = Arc::new(coa_gen.generate());
2924
2925        let mut je_gen = JournalEntryGenerator::new_with_params(
2926            TransactionConfig::default(),
2927            coa,
2928            vec!["1000".to_string()],
2929            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2930            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2931            42,
2932        )
2933        .with_persona_errors(false);
2934
2935        let total = 500;
2936        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2937
2938        let weekend_count = entries
2939            .iter()
2940            .filter(|e| {
2941                let wd = e.header.posting_date.weekday();
2942                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2943            })
2944            .count();
2945
2946        let weekend_pct = weekend_count as f64 / total as f64;
2947        assert!(
2948            weekend_pct < 0.05,
2949            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2950             but got {:.1}% ({}/{})",
2951            weekend_pct * 100.0,
2952            weekend_count,
2953            total
2954        );
2955    }
2956
2957    #[test]
2958    fn test_document_type_derived_from_business_process() {
2959        let mut coa_gen =
2960            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2961        let coa = Arc::new(coa_gen.generate());
2962
2963        let mut je_gen = JournalEntryGenerator::new_with_params(
2964            TransactionConfig::default(),
2965            coa,
2966            vec!["1000".to_string()],
2967            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2968            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2969            99,
2970        )
2971        .with_persona_errors(false)
2972        .with_batching(false);
2973
2974        let total = 200;
2975        let mut doc_types = std::collections::HashSet::new();
2976        let mut sa_count = 0_usize;
2977
2978        for _ in 0..total {
2979            let entry = je_gen.generate();
2980            let dt = &entry.header.document_type;
2981            doc_types.insert(dt.clone());
2982            if dt == "SA" {
2983                sa_count += 1;
2984            }
2985        }
2986
2987        // Should have more than 3 distinct document types
2988        assert!(
2989            doc_types.len() > 3,
2990            "Expected >3 distinct document types, got {} ({:?})",
2991            doc_types.len(),
2992            doc_types,
2993        );
2994
2995        // "SA" should be less than 50% (R2R is 20% of the weight)
2996        let sa_pct = sa_count as f64 / total as f64;
2997        assert!(
2998            sa_pct < 0.50,
2999            "Expected SA <50%, got {:.1}% ({}/{})",
3000            sa_pct * 100.0,
3001            sa_count,
3002            total,
3003        );
3004    }
3005
3006    #[test]
3007    fn test_enrich_line_items_account_description() {
3008        let mut coa_gen =
3009            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3010        let coa = Arc::new(coa_gen.generate());
3011
3012        let mut je_gen = JournalEntryGenerator::new_with_params(
3013            TransactionConfig::default(),
3014            coa,
3015            vec!["1000".to_string()],
3016            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3017            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3018            42,
3019        )
3020        .with_persona_errors(false);
3021
3022        let total = 200;
3023        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3024
3025        // Count lines with account_description populated
3026        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3027        let lines_with_desc: usize = entries
3028            .iter()
3029            .flat_map(|e| &e.lines)
3030            .filter(|l| l.account_description.is_some())
3031            .count();
3032
3033        let desc_pct = lines_with_desc as f64 / total_lines as f64;
3034        assert!(
3035            desc_pct > 0.95,
3036            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
3037            desc_pct * 100.0,
3038            lines_with_desc,
3039            total_lines,
3040        );
3041    }
3042
3043    #[test]
3044    fn test_enrich_line_items_cost_center_for_expense_accounts() {
3045        let mut coa_gen =
3046            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3047        let coa = Arc::new(coa_gen.generate());
3048
3049        let mut je_gen = JournalEntryGenerator::new_with_params(
3050            TransactionConfig::default(),
3051            coa,
3052            vec!["1000".to_string()],
3053            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3054            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3055            42,
3056        )
3057        .with_persona_errors(false);
3058
3059        let total = 300;
3060        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3061
3062        // Count expense account lines (5xxx/6xxx) with cost_center populated
3063        let expense_lines: Vec<&JournalEntryLine> = entries
3064            .iter()
3065            .flat_map(|e| &e.lines)
3066            .filter(|l| {
3067                let first = l.gl_account.chars().next().unwrap_or('0');
3068                first == '5' || first == '6'
3069            })
3070            .collect();
3071
3072        if !expense_lines.is_empty() {
3073            let with_cc = expense_lines
3074                .iter()
3075                .filter(|l| l.cost_center.is_some())
3076                .count();
3077            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
3078            assert!(
3079                cc_pct > 0.80,
3080                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
3081                cc_pct * 100.0,
3082                with_cc,
3083                expense_lines.len(),
3084            );
3085        }
3086    }
3087
3088    #[test]
3089    fn test_enrich_line_items_profit_center_and_line_text() {
3090        let mut coa_gen =
3091            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3092        let coa = Arc::new(coa_gen.generate());
3093
3094        let mut je_gen = JournalEntryGenerator::new_with_params(
3095            TransactionConfig::default(),
3096            coa,
3097            vec!["1000".to_string()],
3098            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3099            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3100            42,
3101        )
3102        .with_persona_errors(false);
3103
3104        let total = 100;
3105        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3106
3107        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3108
3109        // All lines should have profit_center
3110        let with_pc = entries
3111            .iter()
3112            .flat_map(|e| &e.lines)
3113            .filter(|l| l.profit_center.is_some())
3114            .count();
3115        let pc_pct = with_pc as f64 / total_lines as f64;
3116        assert!(
3117            pc_pct > 0.95,
3118            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3119            pc_pct * 100.0,
3120            with_pc,
3121            total_lines,
3122        );
3123
3124        // All lines should have line_text (either from template or header fallback)
3125        let with_text = entries
3126            .iter()
3127            .flat_map(|e| &e.lines)
3128            .filter(|l| l.line_text.is_some())
3129            .count();
3130        let text_pct = with_text as f64 / total_lines as f64;
3131        assert!(
3132            text_pct > 0.95,
3133            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3134            text_pct * 100.0,
3135            with_text,
3136            total_lines,
3137        );
3138    }
3139
3140    // --- ISA 240 audit flag tests ---
3141
3142    #[test]
3143    fn test_je_has_audit_flags() {
3144        let mut coa_gen =
3145            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3146        let coa = Arc::new(coa_gen.generate());
3147
3148        let mut je_gen = JournalEntryGenerator::new_with_params(
3149            TransactionConfig::default(),
3150            coa,
3151            vec!["1000".to_string()],
3152            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3153            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3154            42,
3155        )
3156        .with_persona_errors(false);
3157
3158        for _ in 0..100 {
3159            let entry = je_gen.generate();
3160
3161            // source_system should always be non-empty
3162            assert!(
3163                !entry.header.source_system.is_empty(),
3164                "source_system should be populated, got empty string"
3165            );
3166
3167            // created_by should always be non-empty (already tested elsewhere, but confirm)
3168            assert!(
3169                !entry.header.created_by.is_empty(),
3170                "created_by should be populated"
3171            );
3172
3173            // created_date should always be populated
3174            assert!(
3175                entry.header.created_date.is_some(),
3176                "created_date should be populated"
3177            );
3178        }
3179    }
3180
3181    #[test]
3182    fn test_manual_entry_rate() {
3183        let mut coa_gen =
3184            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3185        let coa = Arc::new(coa_gen.generate());
3186
3187        let mut je_gen = JournalEntryGenerator::new_with_params(
3188            TransactionConfig::default(),
3189            coa,
3190            vec!["1000".to_string()],
3191            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3192            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3193            42,
3194        )
3195        .with_persona_errors(false)
3196        .with_batching(false);
3197
3198        let total = 1000;
3199        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3200
3201        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3202        let manual_rate = manual_count as f64 / total as f64;
3203
3204        // Default source_distribution.manual is typically around 0.05-0.15
3205        // Allow a wide tolerance for statistical variation
3206        assert!(
3207            manual_rate > 0.01 && manual_rate < 0.50,
3208            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3209            manual_rate * 100.0,
3210            manual_count,
3211            total,
3212        );
3213
3214        // is_manual should match TransactionSource::Manual
3215        for entry in &entries {
3216            let source_is_manual = entry.header.source == TransactionSource::Manual;
3217            assert_eq!(
3218                entry.header.is_manual, source_is_manual,
3219                "is_manual should match source == Manual"
3220            );
3221        }
3222    }
3223
3224    #[test]
3225    fn test_manual_source_consistency() {
3226        let mut coa_gen =
3227            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3228        let coa = Arc::new(coa_gen.generate());
3229
3230        let mut je_gen = JournalEntryGenerator::new_with_params(
3231            TransactionConfig::default(),
3232            coa,
3233            vec!["1000".to_string()],
3234            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3235            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3236            42,
3237        )
3238        .with_persona_errors(false)
3239        .with_batching(false);
3240
3241        for _ in 0..500 {
3242            let entry = je_gen.generate();
3243
3244            if entry.header.is_manual {
3245                // Manual entries must have source_system "manual" or "spreadsheet"
3246                assert!(
3247                    entry.header.source_system == "manual"
3248                        || entry.header.source_system == "spreadsheet",
3249                    "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
3250                    entry.header.source_system,
3251                );
3252            } else {
3253                // Non-manual entries must NOT have source_system "manual" or "spreadsheet"
3254                assert!(
3255                    entry.header.source_system != "manual"
3256                        && entry.header.source_system != "spreadsheet",
3257                    "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
3258                    entry.header.source_system,
3259                );
3260            }
3261        }
3262    }
3263
3264    #[test]
3265    fn test_created_date_before_posting() {
3266        let mut coa_gen =
3267            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3268        let coa = Arc::new(coa_gen.generate());
3269
3270        let mut je_gen = JournalEntryGenerator::new_with_params(
3271            TransactionConfig::default(),
3272            coa,
3273            vec!["1000".to_string()],
3274            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3275            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3276            42,
3277        )
3278        .with_persona_errors(false);
3279
3280        for _ in 0..500 {
3281            let entry = je_gen.generate();
3282
3283            if let Some(created_date) = entry.header.created_date {
3284                let created_naive_date = created_date.date();
3285                assert!(
3286                    created_naive_date <= entry.header.posting_date,
3287                    "created_date ({}) should be <= posting_date ({})",
3288                    created_naive_date,
3289                    entry.header.posting_date,
3290                );
3291            }
3292        }
3293    }
3294}