Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15    TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18    AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19    DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20    PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21    ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34/// Generator for realistic journal entries.
35pub struct JournalEntryGenerator {
36    rng: ChaCha8Rng,
37    seed: u64,
38    config: TransactionConfig,
39    coa: Arc<ChartOfAccounts>,
40    companies: Vec<String>,
41    company_selector: WeightedCompanySelector,
42    line_sampler: LineItemSampler,
43    amount_sampler: AmountSampler,
44    temporal_sampler: TemporalSampler,
45    start_date: NaiveDate,
46    end_date: NaiveDate,
47    count: u64,
48    uuid_factory: DeterministicUuidFactory,
49    // Enhanced features
50    user_pool: Option<UserPool>,
51    description_generator: DescriptionGenerator,
52    reference_generator: ReferenceGenerator,
53    template_config: TemplateConfig,
54    vendor_pool: VendorPool,
55    customer_pool: CustomerPool,
56    // Material pool for realistic material references
57    material_pool: Option<MaterialPool>,
58    // Flag indicating whether we're using real master data vs defaults
59    using_real_master_data: bool,
60    // Fraud generation
61    fraud_config: FraudConfig,
62    // Persona-based error injection
63    persona_errors_enabled: bool,
64    // Approval threshold enforcement
65    approval_enabled: bool,
66    approval_threshold: rust_decimal::Decimal,
67    // SOD violation rate for approval tracking (0.0 to 1.0)
68    sod_violation_rate: f64,
69    // Batching behavior - humans often process similar items together
70    batch_state: Option<BatchState>,
71    // Temporal drift controller for simulating distribution changes over time
72    drift_controller: Option<DriftController>,
73    // Temporal patterns components
74    business_day_calculator: Option<BusinessDayCalculator>,
75    processing_lag_calculator: Option<ProcessingLagCalculator>,
76    temporal_patterns_config: Option<TemporalPatternsConfig>,
77    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
78    // sum to 1.0 (validated by config schema). Default matches the legacy
79    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
80    business_process_weights: [(BusinessProcess, f64); 5],
81    // v3.4.0 advanced distributions (mixture models + industry profiles).
82    // None preserves v3.3.2 byte-for-byte behavior; populated only when the
83    // caller opts in via [`set_advanced_distributions`].
84    advanced_amount_sampler: Option<AdvancedAmountSampler>,
85    // v3.5.3+ conditional amount override. Populated when
86    // `config.distributions.conditional` contains an entry where
87    // `output_field == "amount"` and `input_field ∈ {"month",
88    // "quarter", "constant"}`. Applied *after* the fraud-pattern /
89    // advanced-sampler / legacy-sampler cascade on non-fraud entries
90    // so it can steer amounts by calendar context without disturbing
91    // fraud semantics.
92    conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93    // v3.5.4+ Gaussian copula for amount↔line_count correlation. When
94    // populated, each non-fraud JE draws a (u, v) pair; u nudges amount
95    // via a `(0.75 + 0.5*u)` multiplier and v biases line_count toward
96    // the upper/lower end of its range. Produces observable Spearman
97    // correlation without rewiring existing samplers for inverse-CDF.
98    correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102    (BusinessProcess::O2C, 0.35),
103    (BusinessProcess::P2P, 0.30),
104    (BusinessProcess::R2R, 0.20),
105    (BusinessProcess::H2R, 0.10),
106    (BusinessProcess::A2R, 0.05),
107];
108
109/// Map the schema-level [`datasynth_config::schema::IndustryProfileType`]
110/// onto the distributions-layer [`IndustryType`], then return that industry's
111/// pre-configured `sales_amounts` mixture. Used as a fallback when the
112/// caller enables `distributions.amounts` but supplies no components.
113/// Per-entry context channels for conditional-distribution overrides.
114///
115/// v4.1.0+ supported `input_field` values:
116///
117///   - `"month"` — posting-date month (1..=12)
118///   - `"quarter"` — posting-date quarter (1..=4)
119///   - `"year"` — posting-date year (e.g. 2026.0)
120///   - `"day_of_week"` — 1 (Mon) .. 7 (Sun)
121///   - `"day_of_month"` — 1..=31
122///   - `"day_of_year"` — 1..=366
123///   - `"week_of_year"` — 1..=53
124///   - `"is_period_end"` — 1.0 when posting_date is the last business
125///     day of the month, else 0.0
126///   - `"is_quarter_end"` — 1.0 when posting_date is in a quarter-end
127///     month AND is the last business day, else 0.0
128///   - `"is_year_end"` — 1.0 when posting_date is in December AND is
129///     the last business day, else 0.0
130///   - `"constant"` / empty — always 0.0 (treats as unconditional)
131///
132/// Unsupported values cause the conditional rule to be silently ignored
133/// to keep runtime robust against user typos.
134impl JournalEntryGenerator {
135    fn supported_conditional_input(field: &str) -> bool {
136        matches!(
137            field,
138            "month"
139                | "quarter"
140                | "year"
141                | "day_of_week"
142                | "day_of_month"
143                | "day_of_year"
144                | "week_of_year"
145                | "is_period_end"
146                | "is_quarter_end"
147                | "is_year_end"
148                | "constant"
149                | ""
150        )
151    }
152
153    fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
154        let input_field = match self
155            .conditional_amount_override
156            .as_ref()
157            .map(|s| s.config().input_field.as_str())
158        {
159            Some(f) => f,
160            None => return 0.0,
161        };
162
163        let is_last_business_day = |d: chrono::NaiveDate| -> bool {
164            // Last day-of-month → is_period_end. Handles Feb/leap-year
165            // via chrono's num_days_from_ce roundabout; simpler path:
166            // if adding 1 day moves to a different month, this is EOM.
167            let next = d.succ_opt();
168            match next {
169                Some(n) => n.month() != d.month(),
170                None => true,
171            }
172        };
173
174        match input_field {
175            "month" => posting_date.month() as f64,
176            "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
177            "year" => posting_date.year() as f64,
178            "day_of_week" => posting_date.weekday().number_from_monday() as f64,
179            "day_of_month" => posting_date.day() as f64,
180            "day_of_year" => posting_date.ordinal() as f64,
181            "week_of_year" => posting_date.iso_week().week() as f64,
182            "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
183            "is_quarter_end" => {
184                let m = posting_date.month();
185                let is_q_month = matches!(m, 3 | 6 | 9 | 12);
186                f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
187            }
188            "is_year_end" => f64::from(u8::from(
189                posting_date.month() == 12 && is_last_business_day(posting_date),
190            )),
191            _ => 0.0,
192        }
193    }
194}
195
196fn industry_profile_to_log_normal(
197    p: datasynth_config::schema::IndustryProfileType,
198) -> datasynth_core::distributions::LogNormalMixtureConfig {
199    use datasynth_config::schema::IndustryProfileType as P;
200    let industry = match p {
201        P::Retail => IndustryType::Retail,
202        P::Manufacturing => IndustryType::Manufacturing,
203        P::FinancialServices => IndustryType::FinancialServices,
204        P::Healthcare => IndustryType::Healthcare,
205        P::Technology => IndustryType::Technology,
206    };
207    IndustryAmountProfile::for_industry(industry).sales_amounts
208}
209
210/// State for tracking batch processing behavior.
211///
212/// When humans process transactions, they often batch similar items together
213/// (e.g., processing all invoices from one vendor, entering similar expenses).
214#[derive(Clone)]
215struct BatchState {
216    /// The base entry template to vary
217    base_account_number: String,
218    base_amount: rust_decimal::Decimal,
219    base_business_process: Option<BusinessProcess>,
220    base_posting_date: NaiveDate,
221    /// Remaining entries in this batch
222    remaining: u8,
223}
224
225impl JournalEntryGenerator {
226    /// Create a new journal entry generator.
227    pub fn new_with_params(
228        config: TransactionConfig,
229        coa: Arc<ChartOfAccounts>,
230        companies: Vec<String>,
231        start_date: NaiveDate,
232        end_date: NaiveDate,
233        seed: u64,
234    ) -> Self {
235        Self::new_with_full_config(
236            config,
237            coa,
238            companies,
239            start_date,
240            end_date,
241            seed,
242            TemplateConfig::default(),
243            None,
244        )
245    }
246
247    /// Create a new journal entry generator with full configuration.
248    #[allow(clippy::too_many_arguments)]
249    pub fn new_with_full_config(
250        config: TransactionConfig,
251        coa: Arc<ChartOfAccounts>,
252        companies: Vec<String>,
253        start_date: NaiveDate,
254        end_date: NaiveDate,
255        seed: u64,
256        template_config: TemplateConfig,
257        user_pool: Option<UserPool>,
258    ) -> Self {
259        // Initialize user pool if not provided
260        let user_pool = user_pool.or_else(|| {
261            if template_config.names.generate_realistic_names {
262                let user_gen_config = UserGeneratorConfig {
263                    culture_distribution: vec![
264                        (
265                            datasynth_core::templates::NameCulture::WesternUs,
266                            template_config.names.culture_distribution.western_us,
267                        ),
268                        (
269                            datasynth_core::templates::NameCulture::Hispanic,
270                            template_config.names.culture_distribution.hispanic,
271                        ),
272                        (
273                            datasynth_core::templates::NameCulture::German,
274                            template_config.names.culture_distribution.german,
275                        ),
276                        (
277                            datasynth_core::templates::NameCulture::French,
278                            template_config.names.culture_distribution.french,
279                        ),
280                        (
281                            datasynth_core::templates::NameCulture::Chinese,
282                            template_config.names.culture_distribution.chinese,
283                        ),
284                        (
285                            datasynth_core::templates::NameCulture::Japanese,
286                            template_config.names.culture_distribution.japanese,
287                        ),
288                        (
289                            datasynth_core::templates::NameCulture::Indian,
290                            template_config.names.culture_distribution.indian,
291                        ),
292                    ],
293                    email_domain: template_config.names.email_domain.clone(),
294                    generate_realistic_names: true,
295                };
296                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
297                Some(user_gen.generate_standard(&companies))
298            } else {
299                None
300            }
301        });
302
303        // Initialize reference generator
304        let mut ref_gen = ReferenceGenerator::new(
305            start_date.year(),
306            companies
307                .first()
308                .map(std::string::String::as_str)
309                .unwrap_or("1000"),
310        );
311        ref_gen.set_prefix(
312            ReferenceType::Invoice,
313            &template_config.references.invoice_prefix,
314        );
315        ref_gen.set_prefix(
316            ReferenceType::PurchaseOrder,
317            &template_config.references.po_prefix,
318        );
319        ref_gen.set_prefix(
320            ReferenceType::SalesOrder,
321            &template_config.references.so_prefix,
322        );
323
324        // Create weighted company selector (uniform weights for this constructor)
325        let company_selector = WeightedCompanySelector::uniform(companies.clone());
326
327        Self {
328            rng: seeded_rng(seed, 0),
329            seed,
330            config: config.clone(),
331            coa,
332            companies,
333            company_selector,
334            line_sampler: LineItemSampler::with_config(
335                seed + 1,
336                config.line_item_distribution.clone(),
337                config.even_odd_distribution.clone(),
338                config.debit_credit_distribution.clone(),
339            ),
340            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
341            temporal_sampler: TemporalSampler::with_config(
342                seed + 3,
343                config.seasonality.clone(),
344                WorkingHoursConfig::default(),
345                Vec::new(),
346            ),
347            start_date,
348            end_date,
349            count: 0,
350            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
351            user_pool,
352            description_generator: DescriptionGenerator::new(),
353            reference_generator: ref_gen,
354            template_config,
355            vendor_pool: VendorPool::standard(),
356            customer_pool: CustomerPool::standard(),
357            material_pool: None,
358            using_real_master_data: false,
359            fraud_config: FraudConfig::default(),
360            persona_errors_enabled: true, // Enable by default for realism
361            approval_enabled: true,       // Enable by default for realism
362            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
363            sod_violation_rate: 0.10,     // 10% default SOD violation rate
364            batch_state: None,
365            drift_controller: None,
366            // Always provide a basic BusinessDayCalculator so that weekend/holiday
367            // filtering is active even when temporal_patterns is not explicitly enabled.
368            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
369                Region::US,
370                start_date.year(),
371            ))),
372            processing_lag_calculator: None,
373            temporal_patterns_config: None,
374            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
375            advanced_amount_sampler: None,
376            conditional_amount_override: None,
377            correlation_copula: None,
378        }
379    }
380
381    /// Wire v3.4.0 advanced distributions. When the caller's config has
382    /// `distributions.enabled = true` AND `distributions.amounts.enabled =
383    /// true`, the journal-entry generator routes non-fraud amount sampling
384    /// through an [`AdvancedAmountSampler`] (log-normal or Gaussian mixture).
385    ///
386    /// When `distributions.industry_profile` is `Some`, the caller's
387    /// explicitly configured components override nothing — if the component
388    /// list is empty, the industry profile's `sales_amounts` mixture is used
389    /// instead. Explicit components always win.
390    ///
391    /// Returning `Ok(())` with no side effect is intentional for the
392    /// following no-op cases, so callers can unconditionally invoke this:
393    ///   - `config.enabled = false`
394    ///   - `config.amounts.enabled = false`
395    ///   - empty component list with no industry profile
396    ///
397    /// Errors propagate from mixture validation (e.g. weights not summing
398    /// to 1.0, non-positive sigma).
399    pub fn set_advanced_distributions(
400        &mut self,
401        config: &AdvancedDistributionConfig,
402        seed: u64,
403    ) -> Result<(), String> {
404        if !config.enabled {
405            return Ok(());
406        }
407
408        // v3.5.3+: build a conditional-amount override when the config
409        // declares a rule with `output_field == "amount"` and a supported
410        // input field. The override is applied *after* the standard
411        // cascade so it doesn't disturb fraud-path sampling. Unsupported
412        // input fields are ignored with a trace log.
413        self.conditional_amount_override = config
414            .conditional
415            .iter()
416            .find(|c| {
417                c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
418            })
419            .and_then(|c| {
420                datasynth_core::distributions::ConditionalSampler::new(
421                    seed.wrapping_add(17),
422                    c.to_core_config(),
423                )
424                .ok()
425            });
426
427        // v4.1.0+: all 5 copula types wired (Gaussian / Clayton /
428        // Gumbel / Frank / Student-t). The `BivariateCopulaSampler`
429        // already implements each; v3.5.4 had a filter limiting to
430        // Gaussian only — lifted here now that the smoke test matrix
431        // covers all types.
432        self.correlation_copula = config
433            .correlations
434            .to_core_config_for_pair("amount", "line_count")
435            .and_then(|copula_cfg| {
436                datasynth_core::distributions::BivariateCopulaSampler::new(
437                    seed.wrapping_add(31),
438                    copula_cfg,
439                )
440                .ok()
441            });
442
443        // v3.4.4+: Pareto takes precedence over mixture models when set.
444        // This supports heavy-tailed amount distributions (capex, strategic
445        // contracts, fraud) that log-normal/Gaussian mixtures can't model
446        // as sharply.
447        if let Some(pareto) = &config.pareto {
448            if pareto.enabled {
449                let core_cfg = pareto.to_core_config();
450                self.advanced_amount_sampler =
451                    Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
452                return Ok(());
453            }
454        }
455
456        if !config.amounts.enabled {
457            return Ok(());
458        }
459
460        match config.amounts.distribution_type {
461            MixtureDistributionType::LogNormal => {
462                let lognormal_cfg = config
463                    .amounts
464                    .to_log_normal_config()
465                    .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
466                if let Some(cfg) = lognormal_cfg {
467                    self.advanced_amount_sampler =
468                        Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
469                }
470            }
471            MixtureDistributionType::Gaussian => {
472                if let Some(cfg) = config.amounts.to_gaussian_config() {
473                    self.advanced_amount_sampler =
474                        Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
475                }
476            }
477        }
478
479        Ok(())
480    }
481
482    /// Override the business-process volume mix. Weights map directly to the
483    /// `business_processes.*_weight` YAML config; they do not have to sum to
484    /// exactly 1.0 (they're normalized via `weighted_select`).
485    pub fn set_business_process_weights(
486        &mut self,
487        o2c: f64,
488        p2p: f64,
489        r2r: f64,
490        h2r: f64,
491        a2r: f64,
492    ) {
493        self.business_process_weights = [
494            (BusinessProcess::O2C, o2c),
495            (BusinessProcess::P2P, p2p),
496            (BusinessProcess::R2R, r2r),
497            (BusinessProcess::H2R, h2r),
498            (BusinessProcess::A2R, a2r),
499        ];
500    }
501
502    /// Create from a full GeneratorConfig.
503    ///
504    /// This constructor uses the volume_weight from company configs
505    /// for weighted company selection, and fraud config from GeneratorConfig.
506    pub fn from_generator_config(
507        full_config: &GeneratorConfig,
508        coa: Arc<ChartOfAccounts>,
509        start_date: NaiveDate,
510        end_date: NaiveDate,
511        seed: u64,
512    ) -> Self {
513        let companies: Vec<String> = full_config
514            .companies
515            .iter()
516            .map(|c| c.code.clone())
517            .collect();
518
519        // Create weighted selector using volume_weight from company configs
520        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
521
522        let mut generator = Self::new_with_full_config(
523            full_config.transactions.clone(),
524            coa,
525            companies,
526            start_date,
527            end_date,
528            seed,
529            full_config.templates.clone(),
530            None,
531        );
532
533        // Override the uniform selector with weighted selector
534        generator.company_selector = company_selector;
535
536        // Set fraud config
537        generator.fraud_config = full_config.fraud.clone();
538
539        // Configure temporal patterns if enabled
540        let temporal_config = &full_config.temporal_patterns;
541        if temporal_config.enabled {
542            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
543        }
544
545        generator
546    }
547
548    /// Configure temporal patterns including business day calculations and processing lags.
549    ///
550    /// This enables realistic temporal behavior including:
551    /// - Business day awareness (no postings on weekends/holidays)
552    /// - Processing lag modeling (event-to-posting delays)
553    /// - Period-end dynamics (volume spikes at month/quarter/year end)
554    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
555        // Create business day calculator if enabled
556        if config.business_days.enabled {
557            let region = config
558                .calendars
559                .regions
560                .first()
561                .map(|r| Self::parse_region(r))
562                .unwrap_or(Region::US);
563
564            let calendar = HolidayCalendar::new(region, self.start_date.year());
565            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
566        }
567
568        // Create processing lag calculator if enabled
569        if config.processing_lags.enabled {
570            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
571            self.processing_lag_calculator =
572                Some(ProcessingLagCalculator::with_config(seed, lag_config));
573        }
574
575        // Create period-end dynamics if configured
576        let model = config.period_end.model.as_deref().unwrap_or("flat");
577        if model != "flat"
578            || config
579                .period_end
580                .month_end
581                .as_ref()
582                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
583        {
584            let dynamics = Self::convert_period_end_config(&config.period_end);
585            self.temporal_sampler.set_period_end_dynamics(dynamics);
586        }
587
588        self.temporal_patterns_config = Some(config);
589        self
590    }
591
592    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
593    ///
594    /// This is an alternative to [`with_temporal_patterns`] that derives the
595    /// holiday calendar from a country-pack definition rather than the built-in
596    /// region-based calendars.  All other temporal behaviour (business-day
597    /// adjustment, processing lags, period-end dynamics) is configured
598    /// identically.
599    pub fn with_country_pack_temporal(
600        mut self,
601        config: TemporalPatternsConfig,
602        seed: u64,
603        pack: &CountryPack,
604    ) -> Self {
605        // Create business day calculator using the country pack calendar
606        if config.business_days.enabled {
607            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
608            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
609        }
610
611        // Create processing lag calculator if enabled
612        if config.processing_lags.enabled {
613            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
614            self.processing_lag_calculator =
615                Some(ProcessingLagCalculator::with_config(seed, lag_config));
616        }
617
618        // Create period-end dynamics if configured
619        let model = config.period_end.model.as_deref().unwrap_or("flat");
620        if model != "flat"
621            || config
622                .period_end
623                .month_end
624                .as_ref()
625                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
626        {
627            let dynamics = Self::convert_period_end_config(&config.period_end);
628            self.temporal_sampler.set_period_end_dynamics(dynamics);
629        }
630
631        self.temporal_patterns_config = Some(config);
632        self
633    }
634
635    /// Convert schema processing lag config to core config.
636    fn convert_processing_lag_config(
637        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
638    ) -> ProcessingLagConfig {
639        let mut config = ProcessingLagConfig {
640            enabled: schema.enabled,
641            ..Default::default()
642        };
643
644        // Helper to convert lag schema to distribution
645        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
646            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
647            if let Some(min) = lag.min_hours {
648                dist.min_lag_hours = min;
649            }
650            if let Some(max) = lag.max_hours {
651                dist.max_lag_hours = max;
652            }
653            dist
654        };
655
656        // Apply event-specific lags
657        if let Some(ref lag) = schema.sales_order_lag {
658            config
659                .event_lags
660                .insert(EventType::SalesOrder, convert_lag(lag));
661        }
662        if let Some(ref lag) = schema.purchase_order_lag {
663            config
664                .event_lags
665                .insert(EventType::PurchaseOrder, convert_lag(lag));
666        }
667        if let Some(ref lag) = schema.goods_receipt_lag {
668            config
669                .event_lags
670                .insert(EventType::GoodsReceipt, convert_lag(lag));
671        }
672        if let Some(ref lag) = schema.invoice_receipt_lag {
673            config
674                .event_lags
675                .insert(EventType::InvoiceReceipt, convert_lag(lag));
676        }
677        if let Some(ref lag) = schema.invoice_issue_lag {
678            config
679                .event_lags
680                .insert(EventType::InvoiceIssue, convert_lag(lag));
681        }
682        if let Some(ref lag) = schema.payment_lag {
683            config
684                .event_lags
685                .insert(EventType::Payment, convert_lag(lag));
686        }
687        if let Some(ref lag) = schema.journal_entry_lag {
688            config
689                .event_lags
690                .insert(EventType::JournalEntry, convert_lag(lag));
691        }
692
693        // Apply cross-day posting config
694        if let Some(ref cross_day) = schema.cross_day_posting {
695            config.cross_day = CrossDayConfig {
696                enabled: cross_day.enabled,
697                probability_by_hour: cross_day.probability_by_hour.clone(),
698                ..Default::default()
699            };
700        }
701
702        config
703    }
704
705    /// Convert schema period-end config to core PeriodEndDynamics.
706    fn convert_period_end_config(
707        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
708    ) -> PeriodEndDynamics {
709        let model_type = schema.model.as_deref().unwrap_or("exponential");
710
711        // Helper to convert period config
712        let convert_period =
713            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
714             default_peak: f64|
715             -> PeriodEndConfig {
716                if let Some(p) = period {
717                    let model = match model_type {
718                        "flat" => PeriodEndModel::FlatMultiplier {
719                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
720                        },
721                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
722                            start_day: p.start_day.unwrap_or(-10),
723                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
724                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
725                            ramp_up_days: 3, // Default ramp-up period
726                        },
727                        _ => PeriodEndModel::ExponentialAcceleration {
728                            start_day: p.start_day.unwrap_or(-10),
729                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
730                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
731                            decay_rate: p.decay_rate.unwrap_or(0.3),
732                        },
733                    };
734                    PeriodEndConfig {
735                        enabled: true,
736                        model,
737                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
738                    }
739                } else {
740                    PeriodEndConfig {
741                        enabled: true,
742                        model: PeriodEndModel::ExponentialAcceleration {
743                            start_day: -10,
744                            base_multiplier: 1.0,
745                            peak_multiplier: default_peak,
746                            decay_rate: 0.3,
747                        },
748                        additional_multiplier: 1.0,
749                    }
750                }
751            };
752
753        PeriodEndDynamics::new(
754            convert_period(schema.month_end.as_ref(), 2.0),
755            convert_period(schema.quarter_end.as_ref(), 3.5),
756            convert_period(schema.year_end.as_ref(), 5.0),
757        )
758    }
759
760    /// Parse a region string into a Region enum.
761    fn parse_region(region_str: &str) -> Region {
762        match region_str.to_uppercase().as_str() {
763            "US" => Region::US,
764            "DE" => Region::DE,
765            "GB" => Region::GB,
766            "CN" => Region::CN,
767            "JP" => Region::JP,
768            "IN" => Region::IN,
769            "BR" => Region::BR,
770            "MX" => Region::MX,
771            "AU" => Region::AU,
772            "SG" => Region::SG,
773            "KR" => Region::KR,
774            "FR" => Region::FR,
775            "IT" => Region::IT,
776            "ES" => Region::ES,
777            "CA" => Region::CA,
778            _ => Region::US,
779        }
780    }
781
782    /// Set a custom company selector.
783    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
784        self.company_selector = selector;
785    }
786
787    /// Get the current company selector.
788    pub fn company_selector(&self) -> &WeightedCompanySelector {
789        &self.company_selector
790    }
791
792    /// Set fraud configuration.
793    pub fn set_fraud_config(&mut self, config: FraudConfig) {
794        self.fraud_config = config;
795    }
796
797    /// Set vendors from generated master data.
798    ///
799    /// This replaces the default vendor pool with actual generated vendors,
800    /// ensuring JEs reference real master data entities.
801    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
802        if !vendors.is_empty() {
803            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
804            self.using_real_master_data = true;
805        }
806        self
807    }
808
809    /// Set customers from generated master data.
810    ///
811    /// This replaces the default customer pool with actual generated customers,
812    /// ensuring JEs reference real master data entities.
813    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
814        if !customers.is_empty() {
815            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
816            self.using_real_master_data = true;
817        }
818        self
819    }
820
821    /// Set materials from generated master data.
822    ///
823    /// This provides material references for JEs that involve inventory movements.
824    pub fn with_materials(mut self, materials: &[Material]) -> Self {
825        if !materials.is_empty() {
826            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
827            self.using_real_master_data = true;
828        }
829        self
830    }
831
832    /// Set all master data at once for convenience.
833    ///
834    /// This is the recommended way to configure the JE generator with
835    /// generated master data to ensure data coherence.
836    pub fn with_master_data(
837        self,
838        vendors: &[Vendor],
839        customers: &[Customer],
840        materials: &[Material],
841    ) -> Self {
842        self.with_vendors(vendors)
843            .with_customers(customers)
844            .with_materials(materials)
845    }
846
847    /// Replace the user pool with one generated from a [`CountryPack`].
848    ///
849    /// This is an alternative to the default name-culture distribution that
850    /// derives name pools and weights from the country-pack's `names` section.
851    /// The existing user pool (if any) is discarded and regenerated using
852    /// [`MultiCultureNameGenerator::from_country_pack`].
853    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
854        let name_gen =
855            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
856        let config = UserGeneratorConfig {
857            // The culture distribution is embedded in the name generator
858            // itself, so we use an empty list here.
859            culture_distribution: Vec::new(),
860            email_domain: name_gen.email_domain().to_string(),
861            generate_realistic_names: true,
862        };
863        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
864        self.user_pool = Some(user_gen.generate_standard(&self.companies));
865        self
866    }
867
868    /// Check if the generator is using real master data.
869    pub fn is_using_real_master_data(&self) -> bool {
870        self.using_real_master_data
871    }
872
873    /// Determine if this transaction should be fraudulent.
874    /// Pick a realistic ERP `source_system` provenance code.
875    ///
876    /// Returns a string like `"SAP-FI/AP"`, `"manual/adjustment"`,
877    /// `"Interface/EDI"`. Uses the business process to bias toward
878    /// process-appropriate sub-modules (e.g. P2P → SAP-MM/IV, O2C →
879    /// SAP-SD/IV, H2R → SAP-HR/PR). The legacy 7-code shape
880    /// (`SAP-FI`, `SAP-MM`, etc.) is preserved as a prefix so existing
881    /// `starts_with` filters keep working.
882    ///
883    /// **Manual contract**: when `is_manual` is true the returned value
884    /// always starts with `"manual"` or `"spreadsheet"`. This is asserted
885    /// in `test_isa240_audit_flags_populated`.
886    fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
887        if is_manual {
888            // 8 manual provenance codes — all share a `manual/` or
889            // `spreadsheet/` prefix.
890            const MANUAL: &[&str] = &[
891                "manual/standard",
892                "manual/adjustment",
893                "manual/reclassification",
894                "manual/accrual",
895                "manual/reversal",
896                "manual/correction",
897                "spreadsheet/upload",
898                "spreadsheet/journal",
899            ];
900            let idx = (rng.random::<u32>() as usize) % MANUAL.len();
901            return MANUAL[idx].to_string();
902        }
903
904        // Process-aware automated provenance. Each process has a small
905        // primary set; we also mix in cross-process codes ~20% of the
906        // time so the taxonomy stays diverse without losing coherence.
907        let primary: &[&str] = match bp {
908            BusinessProcess::P2P => &[
909                "SAP-MM/PO",
910                "SAP-MM/IV",
911                "SAP-MM/IM",
912                "SAP-FI/AP",
913                "Interface/EDI",
914            ],
915            BusinessProcess::O2C => &[
916                "SAP-SD/ORD",
917                "SAP-SD/DEL",
918                "SAP-SD/IV",
919                "SAP-FI/AR",
920                "Interface/Lockbox",
921            ],
922            BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
923            BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
924            BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
925            BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
926            BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
927            // R2R, S2C, Bank, Audit, Intercompany, ProjectAccounting, Esg
928            // → fall through to a generic mix.
929            _ => &[
930                "SAP-FI/GL",
931                "SAP-FI/AP",
932                "SAP-FI/AR",
933                "SAP-FI/AA",
934                "External/SubL",
935            ],
936        };
937
938        // 80% process-appropriate, 20% cross-process (pulled from a
939        // generic pool) so the categorical distribution has long tails.
940        const CROSS: &[&str] = &[
941            "SAP-FI/GL",
942            "SAP-FI/AP",
943            "SAP-FI/AR",
944            "Interface/EDI",
945            "Interface/Bank",
946            "External/SubL",
947        ];
948        let pool = if rng.random::<f64>() < 0.80 {
949            primary
950        } else {
951            CROSS
952        };
953        let idx = (rng.random::<u32>() as usize) % pool.len();
954        pool[idx].to_string()
955    }
956
957    fn determine_fraud(&mut self) -> Option<FraudType> {
958        if !self.fraud_config.enabled {
959            return None;
960        }
961
962        // Roll for fraud based on fraud rate
963        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
964            return None;
965        }
966
967        // Select fraud type based on distribution
968        Some(self.select_fraud_type())
969    }
970
971    /// Select a fraud type based on the configured distribution.
972    fn select_fraud_type(&mut self) -> FraudType {
973        let dist = &self.fraud_config.fraud_type_distribution;
974        let roll: f64 = self.rng.random();
975
976        let mut cumulative = 0.0;
977
978        cumulative += dist.suspense_account_abuse;
979        if roll < cumulative {
980            return FraudType::SuspenseAccountAbuse;
981        }
982
983        cumulative += dist.fictitious_transaction;
984        if roll < cumulative {
985            return FraudType::FictitiousTransaction;
986        }
987
988        cumulative += dist.revenue_manipulation;
989        if roll < cumulative {
990            return FraudType::RevenueManipulation;
991        }
992
993        cumulative += dist.expense_capitalization;
994        if roll < cumulative {
995            return FraudType::ExpenseCapitalization;
996        }
997
998        cumulative += dist.split_transaction;
999        if roll < cumulative {
1000            return FraudType::SplitTransaction;
1001        }
1002
1003        cumulative += dist.timing_anomaly;
1004        if roll < cumulative {
1005            return FraudType::TimingAnomaly;
1006        }
1007
1008        cumulative += dist.unauthorized_access;
1009        if roll < cumulative {
1010            return FraudType::UnauthorizedAccess;
1011        }
1012
1013        // Default fallback
1014        FraudType::DuplicatePayment
1015    }
1016
1017    /// Map a fraud type to an amount pattern for suspicious amounts.
1018    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1019        match fraud_type {
1020            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1021                FraudAmountPattern::ThresholdAdjacent
1022            }
1023            FraudType::FictitiousTransaction
1024            | FraudType::FictitiousEntry
1025            | FraudType::SuspenseAccountAbuse
1026            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1027            FraudType::RevenueManipulation
1028            | FraudType::ExpenseCapitalization
1029            | FraudType::ImproperCapitalization
1030            | FraudType::ReserveManipulation
1031            | FraudType::UnauthorizedAccess
1032            | FraudType::PrematureRevenue
1033            | FraudType::UnderstatedLiabilities
1034            | FraudType::OverstatedAssets
1035            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1036            FraudType::DuplicatePayment
1037            | FraudType::TimingAnomaly
1038            | FraudType::SelfApproval
1039            | FraudType::ExceededApprovalLimit
1040            | FraudType::SegregationOfDutiesViolation
1041            | FraudType::UnauthorizedApproval
1042            | FraudType::CollusiveApproval
1043            | FraudType::FictitiousVendor
1044            | FraudType::ShellCompanyPayment
1045            | FraudType::Kickback
1046            | FraudType::KickbackScheme
1047            | FraudType::InvoiceManipulation
1048            | FraudType::AssetMisappropriation
1049            | FraudType::InventoryTheft
1050            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1051            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
1052            FraudType::ImproperRevenueRecognition
1053            | FraudType::ImproperPoAllocation
1054            | FraudType::VariableConsiderationManipulation
1055            | FraudType::ContractModificationMisstatement => {
1056                FraudAmountPattern::StatisticallyImprobable
1057            }
1058            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
1059            FraudType::LeaseClassificationManipulation
1060            | FraudType::OffBalanceSheetLease
1061            | FraudType::LeaseLiabilityUnderstatement
1062            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1063            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
1064            FraudType::FairValueHierarchyManipulation
1065            | FraudType::Level3InputManipulation
1066            | FraudType::ValuationTechniqueManipulation => {
1067                FraudAmountPattern::StatisticallyImprobable
1068            }
1069            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
1070            FraudType::DelayedImpairment
1071            | FraudType::ImpairmentTestAvoidance
1072            | FraudType::CashFlowProjectionManipulation
1073            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1074            // Sourcing/Procurement Fraud
1075            FraudType::BidRigging
1076            | FraudType::PhantomVendorContract
1077            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1078            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1079            // HR/Payroll Fraud
1080            FraudType::GhostEmployeePayroll
1081            | FraudType::PayrollInflation
1082            | FraudType::DuplicateExpenseReport
1083            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1084            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1085            // O2C Fraud
1086            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1087            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1088        }
1089    }
1090
1091    /// Generate a deterministic UUID using the factory.
1092    #[inline]
1093    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1094        self.uuid_factory.next()
1095    }
1096
1097    /// Cost center pool used for expense account enrichment.
1098    const COST_CENTER_POOL: &'static [&'static str] =
1099        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1100
1101    /// Enrich journal entry line items with account descriptions, cost centers,
1102    /// profit centers, value dates, line text, and assignment fields.
1103    ///
1104    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
1105    /// and `::credit()` leave as `None`.
1106    fn enrich_line_items(&self, entry: &mut JournalEntry) {
1107        let posting_date = entry.header.posting_date;
1108        let company_code = &entry.header.company_code;
1109        let header_text = entry.header.header_text.clone();
1110        let business_process = entry.header.business_process;
1111
1112        // Derive a deterministic index from the document_id for cost center selection
1113        let doc_id_bytes = entry.header.document_id.as_bytes();
1114        let mut cc_seed: usize = 0;
1115        for &b in doc_id_bytes {
1116            cc_seed = cc_seed.wrapping_add(b as usize);
1117        }
1118
1119        for (i, line) in entry.lines.iter_mut().enumerate() {
1120            // 1. account_description: look up from CoA
1121            if line.account_description.is_none() {
1122                line.account_description = self
1123                    .coa
1124                    .get_account(&line.gl_account)
1125                    .map(|a| a.short_description.clone());
1126            }
1127
1128            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
1129            if line.cost_center.is_none() {
1130                let first_char = line.gl_account.chars().next().unwrap_or('0');
1131                if first_char == '5' || first_char == '6' {
1132                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1133                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1134                }
1135            }
1136
1137            // 3. profit_center: derive from company code + business process
1138            if line.profit_center.is_none() {
1139                let suffix = match business_process {
1140                    Some(BusinessProcess::P2P) => "-P2P",
1141                    Some(BusinessProcess::O2C) => "-O2C",
1142                    Some(BusinessProcess::R2R) => "-R2R",
1143                    Some(BusinessProcess::H2R) => "-H2R",
1144                    _ => "",
1145                };
1146                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1147            }
1148
1149            // 4. line_text: fall back to header_text if not already set
1150            if line.line_text.is_none() {
1151                line.line_text = header_text.clone();
1152            }
1153
1154            // 5. value_date: set to posting_date for AR/AP accounts
1155            if line.value_date.is_none()
1156                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1157            {
1158                line.value_date = Some(posting_date);
1159            }
1160
1161            // 6. assignment: set to vendor/customer reference for AP/AR lines
1162            if line.assignment.is_none() {
1163                if line.gl_account.starts_with("2000") {
1164                    // AP line - use vendor reference from header
1165                    if let Some(ref ht) = header_text {
1166                        // Try to extract vendor ID from header text patterns like "... - V-001"
1167                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
1168                            if vendor_part.starts_with("V-")
1169                                || vendor_part.starts_with("VENDOR")
1170                                || vendor_part.starts_with("Vendor")
1171                            {
1172                                line.assignment = Some(vendor_part.to_string());
1173                            }
1174                        }
1175                    }
1176                } else if line.gl_account.starts_with("1100") {
1177                    // AR line - use customer reference from header
1178                    if let Some(ref ht) = header_text {
1179                        if let Some(customer_part) = ht.rsplit(" - ").next() {
1180                            if customer_part.starts_with("C-")
1181                                || customer_part.starts_with("CUST")
1182                                || customer_part.starts_with("Customer")
1183                            {
1184                                line.assignment = Some(customer_part.to_string());
1185                            }
1186                        }
1187                    }
1188                }
1189            }
1190        }
1191    }
1192
1193    /// Generate a single journal entry.
1194    pub fn generate(&mut self) -> JournalEntry {
1195        debug!(
1196            count = self.count,
1197            companies = self.companies.len(),
1198            start_date = %self.start_date,
1199            end_date = %self.end_date,
1200            "Generating journal entry"
1201        );
1202
1203        // Check if we're in a batch - if so, generate a batched entry
1204        if let Some(ref state) = self.batch_state {
1205            if state.remaining > 0 {
1206                return self.generate_batched_entry();
1207            }
1208        }
1209
1210        self.count += 1;
1211
1212        // Generate deterministic document ID
1213        let document_id = self.generate_deterministic_uuid();
1214
1215        // Sample posting date
1216        let mut posting_date = self
1217            .temporal_sampler
1218            .sample_date(self.start_date, self.end_date);
1219
1220        // Adjust posting date to be a business day if business day calculator is configured
1221        if let Some(ref calc) = self.business_day_calculator {
1222            if !calc.is_business_day(posting_date) {
1223                // Move to next business day
1224                posting_date = calc.next_business_day(posting_date, false);
1225                // Ensure we don't exceed end_date
1226                if posting_date > self.end_date {
1227                    posting_date = calc.prev_business_day(self.end_date, true);
1228                }
1229            }
1230        }
1231
1232        // Select company using weighted selector
1233        let company_code = self.company_selector.select(&mut self.rng).to_string();
1234
1235        // v4.1.0+: draw a single (u, v) pair from the copula — cached for
1236        // both the amount adjustment (u) and the line-count shift (v).
1237        // None when no copula is configured.
1238        let copula_uv: Option<(f64, f64)> =
1239            self.correlation_copula.as_mut().map(|cop| cop.sample());
1240
1241        // Sample line item specification. When a copula is configured,
1242        // v drives line-count via a quantile-preserving map: integer
1243        // count `2 + floor(v * 10)` gives range [2, 11] evenly spaced
1244        // in v, so rank(v) == rank(line_count).
1245        //
1246        // v4.1.6+: upgraded from the v3.5.4 nudge (shift around
1247        // independently-drawn count) to true rank-preserving quantile
1248        // inversion, so empirical Kendall-τ now matches copula theory.
1249        let mut line_spec = self.line_sampler.sample();
1250        if let Some((_u, v)) = copula_uv {
1251            let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
1252            let old_debit = line_spec.debit_count.max(1);
1253            let old_credit = line_spec.credit_count.max(1);
1254            let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
1255                .round() as usize;
1256            let new_debit = new_debit.clamp(1, new_total - 1);
1257            let new_credit = new_total - new_debit;
1258            line_spec.total_count = new_total;
1259            line_spec.debit_count = new_debit;
1260            line_spec.credit_count = new_credit;
1261        }
1262
1263        // Determine source type using full 4-way distribution
1264        let source = self.select_source();
1265        let is_automated = matches!(
1266            source,
1267            TransactionSource::Automated | TransactionSource::Recurring
1268        );
1269
1270        // Select business process
1271        let business_process = self.select_business_process();
1272
1273        // Determine if this is a fraudulent transaction
1274        let fraud_type = self.determine_fraud();
1275        let is_fraud = fraud_type.is_some();
1276
1277        // Sample time based on source
1278        let time = self.temporal_sampler.sample_time(!is_automated);
1279        let created_at = posting_date.and_time(time).and_utc();
1280
1281        // Select user from pool or generate generic
1282        let (created_by, user_persona) = self.select_user(is_automated);
1283
1284        // Create header with deterministic UUID
1285        let mut header =
1286            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1287        header.created_at = created_at;
1288        header.source = source;
1289        header.created_by = created_by;
1290        header.user_persona = user_persona;
1291        header.business_process = Some(business_process);
1292        header.document_type = Self::document_type_for_process(business_process).to_string();
1293        header.is_fraud = is_fraud;
1294        header.fraud_type = fraud_type;
1295
1296        // --- ISA 240 audit flags ---
1297        let is_manual = matches!(source, TransactionSource::Manual);
1298        header.is_manual = is_manual;
1299
1300        // Determine source_system based on manual vs automated.
1301        //
1302        // Real ERPs typically expose 20+ distinct provenance codes per
1303        // company (one per module + sub-module + interface). The taxonomy
1304        // below is a strict superset of the legacy {manual, spreadsheet,
1305        // SAP-FI, SAP-MM, SAP-SD, interface, SAP-HR} codes so downstream
1306        // consumers that filter by prefix (e.g. `starts_with("SAP-")`)
1307        // continue to work.
1308        //
1309        // Contract preserved by the generator-level audit assertion in
1310        // `test_isa240_audit_flags_populated`:
1311        //   - manual entries → starts_with("manual") || starts_with("spreadsheet")
1312        //   - automated entries → does NOT start with "manual"/"spreadsheet"
1313        header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
1314
1315        // is_post_close: entry is in the last month of the configured period
1316        // and the posting date falls after the 25th (simulating close cutoff)
1317        let is_post_close = posting_date.month() == self.end_date.month()
1318            && posting_date.year() == self.end_date.year()
1319            && posting_date.day() > 25;
1320        header.is_post_close = is_post_close;
1321
1322        // created_date: for manual entries, same day as posting; for automated,
1323        // 0-3 days before posting_date
1324        let created_date = if is_manual {
1325            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1326        } else {
1327            let lag_days = self.rng.random_range(0i64..=3);
1328            let created_naive_date = posting_date
1329                .checked_sub_signed(chrono::Duration::days(lag_days))
1330                .unwrap_or(posting_date);
1331            created_naive_date.and_hms_opt(
1332                self.rng.random_range(8u32..=17),
1333                self.rng.random_range(0u32..=59),
1334                self.rng.random_range(0u32..=59),
1335            )
1336        };
1337        header.created_date = created_date;
1338
1339        // Generate description context
1340        let mut context =
1341            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1342
1343        // Add vendor/customer context based on business process
1344        match business_process {
1345            BusinessProcess::P2P => {
1346                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1347                    context.vendor_name = Some(vendor.name.clone());
1348                }
1349            }
1350            BusinessProcess::O2C => {
1351                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1352                    context.customer_name = Some(customer.name.clone());
1353                }
1354            }
1355            _ => {}
1356        }
1357
1358        // Generate header text if enabled
1359        if self.template_config.descriptions.generate_header_text {
1360            header.header_text = Some(self.description_generator.generate_header_text(
1361                business_process,
1362                &context,
1363                &mut self.rng,
1364            ));
1365        }
1366
1367        // Generate reference if enabled
1368        if self.template_config.references.generate_references {
1369            header.reference = Some(
1370                self.reference_generator
1371                    .generate_for_process_year(business_process, posting_date.year()),
1372            );
1373        }
1374
1375        // Derive typed source document from reference prefix
1376        header.source_document = header
1377            .reference
1378            .as_deref()
1379            .and_then(DocumentRef::parse)
1380            .or_else(|| {
1381                if header.source == TransactionSource::Manual {
1382                    Some(DocumentRef::Manual)
1383                } else {
1384                    None
1385                }
1386            });
1387
1388        // Generate line items
1389        let mut entry = JournalEntry::new(header);
1390
1391        // Generate amount - use fraud pattern if this is a fraudulent transaction.
1392        // Non-fraud path prefers the v3.4.0 advanced sampler when configured; fraud
1393        // patterns always use the legacy sampler because they target specific
1394        // thresholds (round numbers, just-under-approval amounts) that are
1395        // orthogonal to mixture models.
1396        let base_amount = if let Some(ft) = fraud_type {
1397            let pattern = self.fraud_type_to_amount_pattern(ft);
1398            self.amount_sampler.sample_fraud(pattern)
1399        } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1400            adv.sample_decimal()
1401        } else {
1402            self.amount_sampler.sample()
1403        };
1404        // v3.5.3+: if a conditional-amount override is configured and
1405        // the JE is non-fraud, re-sample the amount from the conditional
1406        // distribution using the computed context. Fraud entries bypass
1407        // this path to preserve fraud-pattern semantics (as with the
1408        // advanced sampler cascade above).
1409        let base_amount = if fraud_type.is_none() {
1410            // Compute input context BEFORE taking &mut on the sampler
1411            // to avoid borrow-checker conflict with the immutable
1412            // `conditional_input_value` call.
1413            let input = self.conditional_input_value(posting_date);
1414            if let Some(ref mut cond) = self.conditional_amount_override {
1415                cond.sample_decimal(input)
1416            } else {
1417                base_amount
1418            }
1419        } else {
1420            base_amount
1421        };
1422
1423        // v4.1.6+: if a copula is configured AND an advanced amount
1424        // sampler with a ppf is available, use true rank-preserving
1425        // inverse-CDF sampling — amount is drawn DIRECTLY from the
1426        // sampler's quantile at `u`, replacing (not nudging) the
1427        // independently-drawn base_amount. This makes empirical
1428        // Kendall-τ match the copula's theoretical τ.
1429        //
1430        // Fallback for copula-without-advanced-sampler: keep the
1431        // v4.1.0 log-scale multiplier nudge (observable correlation,
1432        // diluted magnitude).
1433        let base_amount = if fraud_type.is_none() {
1434            if let Some((u, _v)) = copula_uv {
1435                if let Some(ref adv) = self.advanced_amount_sampler {
1436                    adv.ppf_decimal(u)
1437                } else {
1438                    let log_mult = 4.0 * (u - 0.5);
1439                    let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
1440                    Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1441                }
1442            } else {
1443                base_amount
1444            }
1445        } else {
1446            base_amount
1447        };
1448
1449        // Apply temporal drift if configured
1450        let drift_adjusted_amount = {
1451            let drift = self.get_drift_adjustments(posting_date);
1452            if drift.amount_mean_multiplier != 1.0 {
1453                // Apply drift multiplier (includes seasonal factor if enabled)
1454                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1455                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1456                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1457            } else {
1458                base_amount
1459            }
1460        };
1461
1462        // Apply human variation to amounts for non-automated transactions
1463        let total_amount = if is_automated {
1464            drift_adjusted_amount // Automated systems use exact amounts
1465        } else {
1466            self.apply_human_variation(drift_adjusted_amount)
1467        };
1468
1469        // Generate debit lines
1470        let debit_amounts = self
1471            .amount_sampler
1472            .sample_summing_to(line_spec.debit_count, total_amount);
1473        for (i, amount) in debit_amounts.into_iter().enumerate() {
1474            let account_number = self.select_debit_account().account_number.clone();
1475            let mut line = JournalEntryLine::debit(
1476                entry.header.document_id,
1477                (i + 1) as u32,
1478                account_number.clone(),
1479                amount,
1480            );
1481
1482            // Generate line text if enabled
1483            if self.template_config.descriptions.generate_line_text {
1484                line.line_text = Some(self.description_generator.generate_line_text(
1485                    &account_number,
1486                    &context,
1487                    &mut self.rng,
1488                ));
1489            }
1490
1491            entry.add_line(line);
1492        }
1493
1494        // Generate credit lines - use the SAME amounts to ensure balance
1495        let credit_amounts = self
1496            .amount_sampler
1497            .sample_summing_to(line_spec.credit_count, total_amount);
1498        for (i, amount) in credit_amounts.into_iter().enumerate() {
1499            let account_number = self.select_credit_account().account_number.clone();
1500            let mut line = JournalEntryLine::credit(
1501                entry.header.document_id,
1502                (line_spec.debit_count + i + 1) as u32,
1503                account_number.clone(),
1504                amount,
1505            );
1506
1507            // Generate line text if enabled
1508            if self.template_config.descriptions.generate_line_text {
1509                line.line_text = Some(self.description_generator.generate_line_text(
1510                    &account_number,
1511                    &context,
1512                    &mut self.rng,
1513                ));
1514            }
1515
1516            entry.add_line(line);
1517        }
1518
1519        // Enrich line items with account descriptions, cost centers, etc.
1520        self.enrich_line_items(&mut entry);
1521
1522        // Apply persona-based errors if enabled and it's a human user
1523        if self.persona_errors_enabled && !is_automated {
1524            self.maybe_inject_persona_error(&mut entry);
1525        }
1526
1527        // Apply approval workflow if enabled and amount exceeds threshold
1528        if self.approval_enabled {
1529            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1530        }
1531
1532        // Populate approved_by / approval_date from the approval workflow
1533        self.populate_approval_fields(&mut entry, posting_date);
1534
1535        // Maybe start a batch of similar entries for realism
1536        self.maybe_start_batch(&entry);
1537
1538        entry
1539    }
1540
1541    /// Enable or disable persona-based error injection.
1542    ///
1543    /// When enabled, entries created by human personas have a chance
1544    /// to contain realistic human errors based on their experience level.
1545    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1546        self.persona_errors_enabled = enabled;
1547        self
1548    }
1549
1550    /// Set fraud configuration for fraud injection.
1551    ///
1552    /// When fraud is enabled in the config, transactions have a chance
1553    /// to be marked as fraudulent based on the configured fraud rate.
1554    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1555        self.fraud_config = config;
1556        self
1557    }
1558
1559    /// Check if persona errors are enabled.
1560    pub fn persona_errors_enabled(&self) -> bool {
1561        self.persona_errors_enabled
1562    }
1563
1564    /// Enable or disable batch processing behavior.
1565    ///
1566    /// When enabled (default), the generator will occasionally produce batches
1567    /// of similar entries, simulating how humans batch similar work together.
1568    pub fn with_batching(mut self, enabled: bool) -> Self {
1569        if !enabled {
1570            self.batch_state = None;
1571        }
1572        self
1573    }
1574
1575    /// Check if batch processing is enabled.
1576    pub fn batching_enabled(&self) -> bool {
1577        // Batching is implicitly enabled when not explicitly disabled
1578        true
1579    }
1580
1581    /// Maybe start a batch based on the current entry.
1582    ///
1583    /// Humans often batch similar work: processing invoices from one vendor,
1584    /// entering expense reports for a trip, reconciling similar items.
1585    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1586        // Only start batch for non-automated, non-fraud entries
1587        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1588            return;
1589        }
1590
1591        // 15% chance to start a batch (most work is not batched)
1592        if self.rng.random::<f64>() > 0.15 {
1593            return;
1594        }
1595
1596        // Extract key attributes for batching
1597        let base_account = entry
1598            .lines
1599            .first()
1600            .map(|l| l.gl_account.clone())
1601            .unwrap_or_default();
1602
1603        let base_amount = entry.total_debit();
1604
1605        self.batch_state = Some(BatchState {
1606            base_account_number: base_account,
1607            base_amount,
1608            base_business_process: entry.header.business_process,
1609            base_posting_date: entry.header.posting_date,
1610            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1611        });
1612    }
1613
1614    /// Generate an entry that's part of the current batch.
1615    ///
1616    /// Batched entries have:
1617    /// - Same or very similar business process
1618    /// - Same posting date (batched work done together)
1619    /// - Similar amounts (within ±15%)
1620    /// - Same debit account (processing similar items)
1621    fn generate_batched_entry(&mut self) -> JournalEntry {
1622        use rust_decimal::Decimal;
1623
1624        // Decrement batch counter
1625        if let Some(ref mut state) = self.batch_state {
1626            state.remaining = state.remaining.saturating_sub(1);
1627        }
1628
1629        let Some(batch) = self.batch_state.clone() else {
1630            // This is a programming error - batch_state should be set before calling this method.
1631            // Clear state and fall back to generating a standard entry instead of panicking.
1632            tracing::warn!(
1633                "generate_batched_entry called without batch_state; generating standard entry"
1634            );
1635            self.batch_state = None;
1636            return self.generate();
1637        };
1638
1639        // Use the batch's posting date (work done on same day)
1640        let posting_date = batch.base_posting_date;
1641
1642        self.count += 1;
1643        let document_id = self.generate_deterministic_uuid();
1644
1645        // Select same company (batched work is usually same company)
1646        let company_code = self.company_selector.select(&mut self.rng).to_string();
1647
1648        // Use simplified line spec for batched entries (usually 2-line)
1649        let _line_spec = LineItemSpec {
1650            total_count: 2,
1651            debit_count: 1,
1652            credit_count: 1,
1653            split_type: DebitCreditSplit::Equal,
1654        };
1655
1656        // Batched entries are always manual
1657        let source = TransactionSource::Manual;
1658
1659        // Use the batch's business process
1660        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1661
1662        // Sample time
1663        let time = self.temporal_sampler.sample_time(true);
1664        let created_at = posting_date.and_time(time).and_utc();
1665
1666        // Same user for batched work
1667        let (created_by, user_persona) = self.select_user(false);
1668
1669        // Create header
1670        let mut header =
1671            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1672        header.created_at = created_at;
1673        header.source = source;
1674        header.created_by = created_by;
1675        header.user_persona = user_persona;
1676        header.business_process = Some(business_process);
1677        header.document_type = Self::document_type_for_process(business_process).to_string();
1678
1679        // Batched manual entries have Manual source document
1680        header.source_document = Some(DocumentRef::Manual);
1681
1682        // ISA 240 audit flags for batched entries (always manual)
1683        header.is_manual = true;
1684        header.source_system = if self.rng.random::<f64>() < 0.70 {
1685            "manual".to_string()
1686        } else {
1687            "spreadsheet".to_string()
1688        };
1689        header.is_post_close = posting_date.month() == self.end_date.month()
1690            && posting_date.year() == self.end_date.year()
1691            && posting_date.day() > 25;
1692        header.created_date =
1693            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1694
1695        // Generate similar amount (within ±15% of base)
1696        let variation = self.rng.random_range(-0.15..0.15);
1697        let varied_amount =
1698            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1699        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1700
1701        // Create the entry
1702        let mut entry = JournalEntry::new(header);
1703
1704        // Use same debit account as batch base
1705        let debit_line = JournalEntryLine::debit(
1706            entry.header.document_id,
1707            1,
1708            batch.base_account_number.clone(),
1709            total_amount,
1710        );
1711        entry.add_line(debit_line);
1712
1713        // Select a credit account
1714        let credit_account = self.select_credit_account().account_number.clone();
1715        let credit_line =
1716            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1717        entry.add_line(credit_line);
1718
1719        // Enrich line items with account descriptions, cost centers, etc.
1720        self.enrich_line_items(&mut entry);
1721
1722        // Apply persona-based errors if enabled
1723        if self.persona_errors_enabled {
1724            self.maybe_inject_persona_error(&mut entry);
1725        }
1726
1727        // Apply approval workflow if enabled
1728        if self.approval_enabled {
1729            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1730        }
1731
1732        // Populate approved_by / approval_date from the approval workflow
1733        self.populate_approval_fields(&mut entry, posting_date);
1734
1735        // Clear batch state if no more entries remaining
1736        if batch.remaining <= 1 {
1737            self.batch_state = None;
1738        }
1739
1740        entry
1741    }
1742
1743    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1744    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1745        // Parse persona from the entry header
1746        let persona_str = &entry.header.user_persona;
1747        let persona = match persona_str.to_lowercase().as_str() {
1748            s if s.contains("junior") => UserPersona::JuniorAccountant,
1749            s if s.contains("senior") => UserPersona::SeniorAccountant,
1750            s if s.contains("controller") => UserPersona::Controller,
1751            s if s.contains("manager") => UserPersona::Manager,
1752            s if s.contains("executive") => UserPersona::Executive,
1753            _ => return, // Don't inject errors for unknown personas
1754        };
1755
1756        // Get base error rate from persona
1757        let base_error_rate = persona.error_rate();
1758
1759        // Apply stress factors based on posting date
1760        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1761
1762        // Check if error should occur based on adjusted rate
1763        if self.rng.random::<f64>() >= adjusted_rate {
1764            return; // No error this time
1765        }
1766
1767        // Select and inject persona-appropriate error
1768        self.inject_human_error(entry, persona);
1769    }
1770
1771    /// Apply contextual stress factors to the base error rate.
1772    ///
1773    /// Stress factors increase error likelihood during:
1774    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1775    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1776    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1777    /// - Monday morning (catch-up work): 20% more errors
1778    /// - Friday afternoon (rushing to leave): 30% more errors
1779    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1780        use chrono::Datelike;
1781
1782        let mut rate = base_rate;
1783        let day = posting_date.day();
1784        let month = posting_date.month();
1785
1786        // Year-end stress (December 28-31): double the error rate
1787        if month == 12 && day >= 28 {
1788            rate *= 2.0;
1789            return rate.min(0.5); // Cap at 50% to keep it realistic
1790        }
1791
1792        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1793        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1794            rate *= 1.75; // 75% more errors at quarter end
1795            return rate.min(0.4);
1796        }
1797
1798        // Month-end stress (last 3 days of month)
1799        if day >= 28 {
1800            rate *= 1.5; // 50% more errors at month end
1801        }
1802
1803        // Day-of-week stress effects
1804        let weekday = posting_date.weekday();
1805        match weekday {
1806            chrono::Weekday::Mon => {
1807                // Monday: catching up, often rushed
1808                rate *= 1.2;
1809            }
1810            chrono::Weekday::Fri => {
1811                // Friday: rushing to finish before weekend
1812                rate *= 1.3;
1813            }
1814            _ => {}
1815        }
1816
1817        // Cap at 40% to keep it realistic
1818        rate.min(0.4)
1819    }
1820
1821    /// Apply human-like variation to an amount.
1822    ///
1823    /// Humans don't enter perfectly calculated amounts - they:
1824    /// - Round amounts differently
1825    /// - Estimate instead of calculating exactly
1826    /// - Make small input variations
1827    ///
1828    /// This applies small variations (typically ±2%) to make amounts more realistic.
1829    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1830        use rust_decimal::Decimal;
1831
1832        // Automated transactions or very small amounts don't get variation
1833        if amount < Decimal::from(10) {
1834            return amount;
1835        }
1836
1837        // 70% chance of human variation being applied
1838        if self.rng.random::<f64>() > 0.70 {
1839            return amount;
1840        }
1841
1842        // Decide which type of human variation to apply
1843        let variation_type: u8 = self.rng.random_range(0..4);
1844
1845        match variation_type {
1846            0 => {
1847                // ±2% variation (common for estimated amounts)
1848                let variation_pct = self.rng.random_range(-0.02..0.02);
1849                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1850                (amount + variation).round_dp(2)
1851            }
1852            1 => {
1853                // Round to nearest $10
1854                let ten = Decimal::from(10);
1855                (amount / ten).round() * ten
1856            }
1857            2 => {
1858                // Round to nearest $100 (for larger amounts)
1859                if amount >= Decimal::from(500) {
1860                    let hundred = Decimal::from(100);
1861                    (amount / hundred).round() * hundred
1862                } else {
1863                    amount
1864                }
1865            }
1866            3 => {
1867                // Slight under/over payment (±$0.01 to ±$1.00)
1868                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1869                (amount + cents).max(Decimal::ZERO).round_dp(2)
1870            }
1871            _ => amount,
1872        }
1873    }
1874
1875    /// Rebalance an entry after a one-sided amount modification.
1876    ///
1877    /// When an error modifies one line's amount, this finds a line on the opposite
1878    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1879    /// same impact to maintain balance.
1880    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1881        // Find a line on the opposite side to adjust
1882        let balancing_idx = entry.lines.iter().position(|l| {
1883            if modified_was_debit {
1884                l.credit_amount > Decimal::ZERO
1885            } else {
1886                l.debit_amount > Decimal::ZERO
1887            }
1888        });
1889
1890        if let Some(idx) = balancing_idx {
1891            if modified_was_debit {
1892                entry.lines[idx].credit_amount += impact;
1893            } else {
1894                entry.lines[idx].debit_amount += impact;
1895            }
1896        }
1897    }
1898
1899    /// Inject a human-like error based on the persona.
1900    ///
1901    /// All error types maintain balance - amount modifications are applied to both sides.
1902    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1903    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1904        use rust_decimal::Decimal;
1905
1906        // Different personas make different types of errors
1907        let error_type: u8 = match persona {
1908            UserPersona::JuniorAccountant => {
1909                // Junior accountants make more varied errors
1910                self.rng.random_range(0..5)
1911            }
1912            UserPersona::SeniorAccountant => {
1913                // Senior accountants mainly make transposition errors
1914                self.rng.random_range(0..3)
1915            }
1916            UserPersona::Controller | UserPersona::Manager => {
1917                // Controllers/managers mainly make rounding or cutoff errors
1918                self.rng.random_range(3..5)
1919            }
1920            _ => return,
1921        };
1922
1923        match error_type {
1924            0 => {
1925                // Transposed digits in an amount
1926                if let Some(line) = entry.lines.get_mut(0) {
1927                    let is_debit = line.debit_amount > Decimal::ZERO;
1928                    let original_amount = if is_debit {
1929                        line.debit_amount
1930                    } else {
1931                        line.credit_amount
1932                    };
1933
1934                    // Simple digit swap in the string representation
1935                    let s = original_amount.to_string();
1936                    if s.len() >= 2 {
1937                        let chars: Vec<char> = s.chars().collect();
1938                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1939                        if chars[pos].is_ascii_digit()
1940                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1941                        {
1942                            let mut new_chars = chars;
1943                            new_chars.swap(pos, pos + 1);
1944                            if let Ok(new_amount) =
1945                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1946                            {
1947                                let impact = new_amount - original_amount;
1948
1949                                // Apply to the modified line
1950                                if is_debit {
1951                                    entry.lines[0].debit_amount = new_amount;
1952                                } else {
1953                                    entry.lines[0].credit_amount = new_amount;
1954                                }
1955
1956                                // Rebalance the entry
1957                                Self::rebalance_entry(entry, is_debit, impact);
1958
1959                                entry.header.header_text = Some(
1960                                    entry.header.header_text.clone().unwrap_or_default()
1961                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1962                                );
1963                            }
1964                        }
1965                    }
1966                }
1967            }
1968            1 => {
1969                // Wrong decimal place (off by factor of 10)
1970                if let Some(line) = entry.lines.get_mut(0) {
1971                    let is_debit = line.debit_amount > Decimal::ZERO;
1972                    let original_amount = if is_debit {
1973                        line.debit_amount
1974                    } else {
1975                        line.credit_amount
1976                    };
1977
1978                    let new_amount = original_amount * Decimal::new(10, 0);
1979                    let impact = new_amount - original_amount;
1980
1981                    // Apply to the modified line
1982                    if is_debit {
1983                        entry.lines[0].debit_amount = new_amount;
1984                    } else {
1985                        entry.lines[0].credit_amount = new_amount;
1986                    }
1987
1988                    // Rebalance the entry
1989                    Self::rebalance_entry(entry, is_debit, impact);
1990
1991                    entry.header.header_text = Some(
1992                        entry.header.header_text.clone().unwrap_or_default()
1993                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1994                    );
1995                }
1996            }
1997            2 => {
1998                // Typo in description (doesn't affect balance)
1999                if let Some(ref mut text) = entry.header.header_text {
2000                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
2001                    let correct = ["the", "and", "with", "that", "receive"];
2002                    let idx = self.rng.random_range(0..typos.len());
2003                    if text.to_lowercase().contains(correct[idx]) {
2004                        *text = text.replace(correct[idx], typos[idx]);
2005                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
2006                    }
2007                }
2008            }
2009            3 => {
2010                // Rounding to round number
2011                if let Some(line) = entry.lines.get_mut(0) {
2012                    let is_debit = line.debit_amount > Decimal::ZERO;
2013                    let original_amount = if is_debit {
2014                        line.debit_amount
2015                    } else {
2016                        line.credit_amount
2017                    };
2018
2019                    let new_amount =
2020                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
2021                    let impact = new_amount - original_amount;
2022
2023                    // Apply to the modified line
2024                    if is_debit {
2025                        entry.lines[0].debit_amount = new_amount;
2026                    } else {
2027                        entry.lines[0].credit_amount = new_amount;
2028                    }
2029
2030                    // Rebalance the entry
2031                    Self::rebalance_entry(entry, is_debit, impact);
2032
2033                    entry.header.header_text = Some(
2034                        entry.header.header_text.clone().unwrap_or_default()
2035                            + " [HUMAN_ERROR:ROUNDED]",
2036                    );
2037                }
2038            }
2039            // Late posting marker (document date much earlier than posting
2040            // date). Doesn't create an imbalance.
2041            4 if entry.header.document_date == entry.header.posting_date => {
2042                let days_late = self.rng.random_range(5..15);
2043                entry.header.document_date =
2044                    entry.header.posting_date - chrono::Duration::days(days_late);
2045                entry.header.header_text = Some(
2046                    entry.header.header_text.clone().unwrap_or_default()
2047                        + " [HUMAN_ERROR:LATE_POSTING]",
2048                );
2049            }
2050            _ => {}
2051        }
2052    }
2053
2054    /// Apply approval workflow for high-value transactions.
2055    ///
2056    /// If the entry amount exceeds the approval threshold, simulate an
2057    /// approval workflow with appropriate approvers based on amount.
2058    fn maybe_apply_approval_workflow(
2059        &mut self,
2060        entry: &mut JournalEntry,
2061        _posting_date: NaiveDate,
2062    ) {
2063        use rust_decimal::Decimal;
2064
2065        let amount = entry.total_debit();
2066
2067        // Skip if amount is below threshold
2068        if amount <= self.approval_threshold {
2069            // Auto-approved below threshold
2070            let workflow = ApprovalWorkflow::auto_approved(
2071                entry.header.created_by.clone(),
2072                entry.header.user_persona.clone(),
2073                amount,
2074                entry.header.created_at,
2075            );
2076            entry.header.approval_workflow = Some(workflow);
2077            return;
2078        }
2079
2080        // Mark as SOX relevant for high-value transactions
2081        entry.header.sox_relevant = true;
2082
2083        // Determine required approval levels based on amount
2084        let required_levels = if amount > Decimal::new(100000, 0) {
2085            3 // Executive approval required
2086        } else if amount > Decimal::new(50000, 0) {
2087            2 // Senior management approval
2088        } else {
2089            1 // Manager approval
2090        };
2091
2092        // Create the approval workflow
2093        let mut workflow = ApprovalWorkflow::new(
2094            entry.header.created_by.clone(),
2095            entry.header.user_persona.clone(),
2096            amount,
2097        );
2098        workflow.required_levels = required_levels;
2099
2100        // Simulate submission
2101        let submit_time = entry.header.created_at;
2102        let submit_action = ApprovalAction::new(
2103            entry.header.created_by.clone(),
2104            entry.header.user_persona.clone(),
2105            self.parse_persona(&entry.header.user_persona),
2106            ApprovalActionType::Submit,
2107            0,
2108        )
2109        .with_timestamp(submit_time);
2110
2111        workflow.actions.push(submit_action);
2112        workflow.status = ApprovalStatus::Pending;
2113        workflow.submitted_at = Some(submit_time);
2114
2115        // Simulate approvals with realistic delays
2116        let mut current_time = submit_time;
2117        for level in 1..=required_levels {
2118            // Add delay for approval (1-3 business hours per level)
2119            let delay_hours = self.rng.random_range(1..4);
2120            current_time += chrono::Duration::hours(delay_hours);
2121
2122            // Skip weekends
2123            while current_time.weekday() == chrono::Weekday::Sat
2124                || current_time.weekday() == chrono::Weekday::Sun
2125            {
2126                current_time += chrono::Duration::days(1);
2127            }
2128
2129            // Generate approver based on level
2130            let (approver_id, approver_role) = self.select_approver(level);
2131
2132            let approve_action = ApprovalAction::new(
2133                approver_id.clone(),
2134                approver_role.to_string(),
2135                approver_role,
2136                ApprovalActionType::Approve,
2137                level,
2138            )
2139            .with_timestamp(current_time);
2140
2141            workflow.actions.push(approve_action);
2142            workflow.current_level = level;
2143        }
2144
2145        // Mark as approved
2146        workflow.status = ApprovalStatus::Approved;
2147        workflow.approved_at = Some(current_time);
2148
2149        entry.header.approval_workflow = Some(workflow);
2150    }
2151
2152    /// Select an approver based on the required level.
2153    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
2154        let persona = match level {
2155            1 => UserPersona::Manager,
2156            2 => UserPersona::Controller,
2157            _ => UserPersona::Executive,
2158        };
2159
2160        // Try to get from user pool first
2161        if let Some(ref pool) = self.user_pool {
2162            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2163                return (user.user_id.clone(), persona);
2164            }
2165        }
2166
2167        // Fallback to generated approver
2168        let approver_id = match persona {
2169            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2170            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2171            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2172            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2173        };
2174
2175        (approver_id, persona)
2176    }
2177
2178    /// Parse user persona from string.
2179    fn parse_persona(&self, persona_str: &str) -> UserPersona {
2180        match persona_str.to_lowercase().as_str() {
2181            s if s.contains("junior") => UserPersona::JuniorAccountant,
2182            s if s.contains("senior") => UserPersona::SeniorAccountant,
2183            s if s.contains("controller") => UserPersona::Controller,
2184            s if s.contains("manager") => UserPersona::Manager,
2185            s if s.contains("executive") => UserPersona::Executive,
2186            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2187            _ => UserPersona::JuniorAccountant, // Default
2188        }
2189    }
2190
2191    /// Enable or disable approval workflow.
2192    pub fn with_approval(mut self, enabled: bool) -> Self {
2193        self.approval_enabled = enabled;
2194        self
2195    }
2196
2197    /// Set the approval threshold amount.
2198    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2199        self.approval_threshold = threshold;
2200        self
2201    }
2202
2203    /// Set the SOD violation rate for approval tracking.
2204    ///
2205    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
2206    /// that the approver is the same as the creator, which constitutes a SOD violation.
2207    /// Default is 0.10 (10%).
2208    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2209        self.sod_violation_rate = rate;
2210        self
2211    }
2212
2213    /// Populate `approved_by` and `approval_date` from the approval workflow,
2214    /// and flag SOD violations when the approver matches the creator.
2215    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2216        if let Some(ref workflow) = entry.header.approval_workflow {
2217            // Extract the last approver from the workflow actions
2218            let last_approver = workflow
2219                .actions
2220                .iter()
2221                .rev()
2222                .find(|a| matches!(a.action, ApprovalActionType::Approve));
2223
2224            if let Some(approver_action) = last_approver {
2225                entry.header.approved_by = Some(approver_action.actor_id.clone());
2226                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2227            } else {
2228                // No explicit approver (auto-approved); use the preparer
2229                entry.header.approved_by = Some(workflow.preparer_id.clone());
2230                entry.header.approval_date = Some(posting_date);
2231            }
2232
2233            // Inject SOD violation: with configured probability, set approver = creator
2234            if self.rng.random::<f64>() < self.sod_violation_rate {
2235                let creator = entry.header.created_by.clone();
2236                entry.header.approved_by = Some(creator);
2237                entry.header.sod_violation = true;
2238                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2239            }
2240        }
2241    }
2242
2243    /// Set the temporal drift controller for simulating distribution changes over time.
2244    ///
2245    /// When drift is enabled, amounts and other distributions will shift based on
2246    /// the period (month) to simulate realistic temporal evolution like inflation
2247    /// or increasing fraud rates.
2248    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2249        self.drift_controller = Some(controller);
2250        self
2251    }
2252
2253    /// Set drift configuration directly.
2254    ///
2255    /// Creates a drift controller from the config. Total periods is calculated
2256    /// from the date range.
2257    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2258        if config.enabled {
2259            let total_periods = self.calculate_total_periods();
2260            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2261        }
2262        self
2263    }
2264
2265    /// Calculate total periods (months) in the date range.
2266    fn calculate_total_periods(&self) -> u32 {
2267        let start_year = self.start_date.year();
2268        let start_month = self.start_date.month();
2269        let end_year = self.end_date.year();
2270        let end_month = self.end_date.month();
2271
2272        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2273    }
2274
2275    /// Calculate the period number (0-indexed) for a given date.
2276    fn date_to_period(&self, date: NaiveDate) -> u32 {
2277        let start_year = self.start_date.year();
2278        let start_month = self.start_date.month() as i32;
2279        let date_year = date.year();
2280        let date_month = date.month() as i32;
2281
2282        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2283    }
2284
2285    /// Get drift adjustments for a given date.
2286    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2287        if let Some(ref controller) = self.drift_controller {
2288            let period = self.date_to_period(date);
2289            controller.compute_adjustments(period)
2290        } else {
2291            DriftAdjustments::none()
2292        }
2293    }
2294
2295    /// Select a user from the pool or generate a generic user ID.
2296    #[inline]
2297    fn select_user(&mut self, is_automated: bool) -> (String, String) {
2298        if let Some(ref pool) = self.user_pool {
2299            let persona = if is_automated {
2300                UserPersona::AutomatedSystem
2301            } else {
2302                // Random distribution among human personas
2303                let roll: f64 = self.rng.random();
2304                if roll < 0.4 {
2305                    UserPersona::JuniorAccountant
2306                } else if roll < 0.7 {
2307                    UserPersona::SeniorAccountant
2308                } else if roll < 0.85 {
2309                    UserPersona::Controller
2310                } else {
2311                    UserPersona::Manager
2312                }
2313            };
2314
2315            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2316                return (user.user_id.clone(), user.persona.to_string());
2317            }
2318        }
2319
2320        // Fallback to generic format
2321        if is_automated {
2322            (
2323                format!("BATCH{:04}", self.rng.random_range(1..=20)),
2324                "automated_system".to_string(),
2325            )
2326        } else {
2327            (
2328                format!("USER{:04}", self.rng.random_range(1..=40)),
2329                "senior_accountant".to_string(),
2330            )
2331        }
2332    }
2333
2334    /// Select transaction source based on configuration weights.
2335    #[inline]
2336    fn select_source(&mut self) -> TransactionSource {
2337        let roll: f64 = self.rng.random();
2338        let dist = &self.config.source_distribution;
2339
2340        if roll < dist.manual {
2341            TransactionSource::Manual
2342        } else if roll < dist.manual + dist.automated {
2343            TransactionSource::Automated
2344        } else if roll < dist.manual + dist.automated + dist.recurring {
2345            TransactionSource::Recurring
2346        } else {
2347            TransactionSource::Adjustment
2348        }
2349    }
2350
2351    /// Select a business process based on configuration weights.
2352    #[inline]
2353    /// Map a business process to a SAP-style document type code.
2354    ///
2355    /// - P2P → "KR" (vendor invoice)
2356    /// - O2C → "DR" (customer invoice)
2357    /// - R2R → "SA" (general journal)
2358    /// - H2R → "HR" (HR posting)
2359    /// - A2R → "AA" (asset posting)
2360    /// - others → "SA"
2361    fn document_type_for_process(process: BusinessProcess) -> &'static str {
2362        match process {
2363            BusinessProcess::P2P => "KR",
2364            BusinessProcess::O2C => "DR",
2365            BusinessProcess::R2R => "SA",
2366            BusinessProcess::H2R => "HR",
2367            BusinessProcess::A2R => "AA",
2368            _ => "SA",
2369        }
2370    }
2371
2372    fn select_business_process(&mut self) -> BusinessProcess {
2373        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2374    }
2375
2376    #[inline]
2377    fn select_debit_account(&mut self) -> &GLAccount {
2378        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2379        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2380
2381        // 60% asset, 40% expense for debits
2382        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2383            accounts
2384        } else {
2385            expense_accounts
2386        };
2387
2388        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2389            tracing::warn!(
2390                "Account selection returned empty list, falling back to first COA account"
2391            );
2392            &self.coa.accounts[0]
2393        })
2394    }
2395
2396    #[inline]
2397    fn select_credit_account(&mut self) -> &GLAccount {
2398        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2399        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2400
2401        // 60% liability, 40% revenue for credits
2402        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2403            liability_accounts
2404        } else {
2405            revenue_accounts
2406        };
2407
2408        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2409            tracing::warn!(
2410                "Account selection returned empty list, falling back to first COA account"
2411            );
2412            &self.coa.accounts[0]
2413        })
2414    }
2415}
2416
2417impl Generator for JournalEntryGenerator {
2418    type Item = JournalEntry;
2419    type Config = (
2420        TransactionConfig,
2421        Arc<ChartOfAccounts>,
2422        Vec<String>,
2423        NaiveDate,
2424        NaiveDate,
2425    );
2426
2427    fn new(config: Self::Config, seed: u64) -> Self {
2428        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2429    }
2430
2431    fn generate_one(&mut self) -> Self::Item {
2432        self.generate()
2433    }
2434
2435    fn reset(&mut self) {
2436        self.rng = seeded_rng(self.seed, 0);
2437        self.line_sampler.reset(self.seed + 1);
2438        self.amount_sampler.reset(self.seed + 2);
2439        self.temporal_sampler.reset(self.seed + 3);
2440        if let Some(ref mut adv) = self.advanced_amount_sampler {
2441            adv.reset(self.seed + 2);
2442        }
2443        self.count = 0;
2444        self.uuid_factory.reset();
2445
2446        // Reset reference generator by recreating it
2447        let mut ref_gen = ReferenceGenerator::new(
2448            self.start_date.year(),
2449            self.companies
2450                .first()
2451                .map(std::string::String::as_str)
2452                .unwrap_or("1000"),
2453        );
2454        ref_gen.set_prefix(
2455            ReferenceType::Invoice,
2456            &self.template_config.references.invoice_prefix,
2457        );
2458        ref_gen.set_prefix(
2459            ReferenceType::PurchaseOrder,
2460            &self.template_config.references.po_prefix,
2461        );
2462        ref_gen.set_prefix(
2463            ReferenceType::SalesOrder,
2464            &self.template_config.references.so_prefix,
2465        );
2466        self.reference_generator = ref_gen;
2467    }
2468
2469    fn count(&self) -> u64 {
2470        self.count
2471    }
2472
2473    fn seed(&self) -> u64 {
2474        self.seed
2475    }
2476}
2477
2478use datasynth_core::traits::ParallelGenerator;
2479
2480impl ParallelGenerator for JournalEntryGenerator {
2481    /// Split this generator into `parts` independent sub-generators.
2482    ///
2483    /// Each sub-generator gets a deterministic seed derived from the parent seed
2484    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2485    /// The results are deterministic for a given partition count.
2486    fn split(self, parts: usize) -> Vec<Self> {
2487        let parts = parts.max(1);
2488        (0..parts)
2489            .map(|i| {
2490                // Derive a unique seed per partition using a golden-ratio constant
2491                let sub_seed = self
2492                    .seed
2493                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2494
2495                let mut gen = JournalEntryGenerator::new_with_full_config(
2496                    self.config.clone(),
2497                    Arc::clone(&self.coa),
2498                    self.companies.clone(),
2499                    self.start_date,
2500                    self.end_date,
2501                    sub_seed,
2502                    self.template_config.clone(),
2503                    self.user_pool.clone(),
2504                );
2505
2506                // Copy over configuration state
2507                gen.company_selector = self.company_selector.clone();
2508                gen.vendor_pool = self.vendor_pool.clone();
2509                gen.customer_pool = self.customer_pool.clone();
2510                gen.material_pool = self.material_pool.clone();
2511                gen.using_real_master_data = self.using_real_master_data;
2512                gen.fraud_config = self.fraud_config.clone();
2513                gen.persona_errors_enabled = self.persona_errors_enabled;
2514                gen.approval_enabled = self.approval_enabled;
2515                gen.approval_threshold = self.approval_threshold;
2516                gen.sod_violation_rate = self.sod_violation_rate;
2517                // v3.4.0+: advanced amount sampler (mixture / Pareto /
2518                // Gaussian). Clone and reset the internal RNG with the
2519                // partition's sub_seed so each worker explores a unique
2520                // subsequence without repeating the parent stream.
2521                if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2522                    adv.reset(sub_seed.wrapping_add(2));
2523                    gen.advanced_amount_sampler = Some(adv);
2524                }
2525                // v3.5.3+: conditional amount override — clone + reset
2526                // so each partition gets a fresh deterministic stream.
2527                if let Some(mut cond) = self.conditional_amount_override.clone() {
2528                    cond.reset(sub_seed.wrapping_add(17));
2529                    gen.conditional_amount_override = Some(cond);
2530                }
2531                // v3.5.4+: copula sampler — clone + reset per partition.
2532                if let Some(mut cop) = self.correlation_copula.clone() {
2533                    cop.reset(sub_seed.wrapping_add(31));
2534                    gen.correlation_copula = Some(cop);
2535                }
2536
2537                // Use partitioned UUID factory to eliminate atomic contention
2538                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2539                    sub_seed,
2540                    GeneratorType::JournalEntry,
2541                    i as u8,
2542                );
2543
2544                // Copy temporal patterns if configured
2545                if let Some(ref config) = self.temporal_patterns_config {
2546                    gen.temporal_patterns_config = Some(config.clone());
2547                    // Rebuild business day calculator from the stored config
2548                    if config.business_days.enabled {
2549                        if let Some(ref bdc) = self.business_day_calculator {
2550                            gen.business_day_calculator = Some(bdc.clone());
2551                        }
2552                    }
2553                    // Rebuild processing lag calculator with partition seed
2554                    if config.processing_lags.enabled {
2555                        let lag_config =
2556                            Self::convert_processing_lag_config(&config.processing_lags);
2557                        gen.processing_lag_calculator =
2558                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2559                    }
2560                }
2561
2562                // Copy drift controller if present
2563                if let Some(ref dc) = self.drift_controller {
2564                    gen.drift_controller = Some(dc.clone());
2565                }
2566
2567                gen
2568            })
2569            .collect()
2570    }
2571}
2572
2573#[cfg(test)]
2574#[allow(clippy::unwrap_used)]
2575mod tests {
2576    use super::*;
2577    use crate::ChartOfAccountsGenerator;
2578
2579    #[test]
2580    fn test_generate_balanced_entries() {
2581        let mut coa_gen =
2582            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2583        let coa = Arc::new(coa_gen.generate());
2584
2585        let mut je_gen = JournalEntryGenerator::new_with_params(
2586            TransactionConfig::default(),
2587            coa,
2588            vec!["1000".to_string()],
2589            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2590            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2591            42,
2592        );
2593
2594        let mut balanced_count = 0;
2595        for _ in 0..100 {
2596            let entry = je_gen.generate();
2597
2598            // Skip entries with human errors as they may be intentionally unbalanced
2599            let has_human_error = entry
2600                .header
2601                .header_text
2602                .as_ref()
2603                .map(|t| t.contains("[HUMAN_ERROR:"))
2604                .unwrap_or(false);
2605
2606            if !has_human_error {
2607                assert!(
2608                    entry.is_balanced(),
2609                    "Entry {:?} is not balanced",
2610                    entry.header.document_id
2611                );
2612                balanced_count += 1;
2613            }
2614            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2615        }
2616
2617        // Ensure most entries are balanced (human errors are rare)
2618        assert!(
2619            balanced_count >= 80,
2620            "Expected at least 80 balanced entries, got {}",
2621            balanced_count
2622        );
2623    }
2624
2625    #[test]
2626    fn test_deterministic_generation() {
2627        let mut coa_gen =
2628            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2629        let coa = Arc::new(coa_gen.generate());
2630
2631        let mut gen1 = JournalEntryGenerator::new_with_params(
2632            TransactionConfig::default(),
2633            Arc::clone(&coa),
2634            vec!["1000".to_string()],
2635            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2636            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2637            42,
2638        );
2639
2640        let mut gen2 = JournalEntryGenerator::new_with_params(
2641            TransactionConfig::default(),
2642            coa,
2643            vec!["1000".to_string()],
2644            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2645            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2646            42,
2647        );
2648
2649        for _ in 0..50 {
2650            let e1 = gen1.generate();
2651            let e2 = gen2.generate();
2652            assert_eq!(e1.header.document_id, e2.header.document_id);
2653            assert_eq!(e1.total_debit(), e2.total_debit());
2654        }
2655    }
2656
2657    #[test]
2658    fn test_templates_generate_descriptions() {
2659        let mut coa_gen =
2660            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2661        let coa = Arc::new(coa_gen.generate());
2662
2663        // Enable all template features
2664        let template_config = TemplateConfig {
2665            names: datasynth_config::schema::NameTemplateConfig {
2666                generate_realistic_names: true,
2667                email_domain: "test.com".to_string(),
2668                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2669            },
2670            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2671                generate_header_text: true,
2672                generate_line_text: true,
2673            },
2674            references: datasynth_config::schema::ReferenceTemplateConfig {
2675                generate_references: true,
2676                invoice_prefix: "TEST-INV".to_string(),
2677                po_prefix: "TEST-PO".to_string(),
2678                so_prefix: "TEST-SO".to_string(),
2679            },
2680            path: None,
2681            merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2682        };
2683
2684        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2685            TransactionConfig::default(),
2686            coa,
2687            vec!["1000".to_string()],
2688            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2689            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2690            42,
2691            template_config,
2692            None,
2693        )
2694        .with_persona_errors(false); // Disable for template testing
2695
2696        for _ in 0..10 {
2697            let entry = je_gen.generate();
2698
2699            // Verify header text is populated
2700            assert!(
2701                entry.header.header_text.is_some(),
2702                "Header text should be populated"
2703            );
2704
2705            // Verify reference is populated
2706            assert!(
2707                entry.header.reference.is_some(),
2708                "Reference should be populated"
2709            );
2710
2711            // Verify business process is set
2712            assert!(
2713                entry.header.business_process.is_some(),
2714                "Business process should be set"
2715            );
2716
2717            // Verify line text is populated
2718            for line in &entry.lines {
2719                assert!(line.line_text.is_some(), "Line text should be populated");
2720            }
2721
2722            // Entry should still be balanced
2723            assert!(entry.is_balanced());
2724        }
2725    }
2726
2727    #[test]
2728    fn test_user_pool_integration() {
2729        let mut coa_gen =
2730            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2731        let coa = Arc::new(coa_gen.generate());
2732
2733        let companies = vec!["1000".to_string()];
2734
2735        // Generate user pool
2736        let mut user_gen = crate::UserGenerator::new(42);
2737        let user_pool = user_gen.generate_standard(&companies);
2738
2739        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2740            TransactionConfig::default(),
2741            coa,
2742            companies,
2743            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2744            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2745            42,
2746            TemplateConfig::default(),
2747            Some(user_pool),
2748        );
2749
2750        // Generate entries and verify user IDs are from pool
2751        for _ in 0..20 {
2752            let entry = je_gen.generate();
2753
2754            // User ID should not be generic BATCH/USER format when pool is used
2755            // (though it may still fall back if random selection misses)
2756            assert!(!entry.header.created_by.is_empty());
2757        }
2758    }
2759
2760    #[test]
2761    fn test_master_data_connection() {
2762        let mut coa_gen =
2763            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2764        let coa = Arc::new(coa_gen.generate());
2765
2766        // Create test vendors
2767        let vendors = vec![
2768            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2769            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2770        ];
2771
2772        // Create test customers
2773        let customers = vec![
2774            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2775            Customer::new(
2776                "C-TEST-002",
2777                "Test Customer Two",
2778                CustomerType::SmallBusiness,
2779            ),
2780        ];
2781
2782        // Create test materials
2783        let materials = vec![Material::new(
2784            "MAT-TEST-001",
2785            "Test Material A",
2786            MaterialType::RawMaterial,
2787        )];
2788
2789        // Create generator with master data
2790        let generator = JournalEntryGenerator::new_with_params(
2791            TransactionConfig::default(),
2792            coa,
2793            vec!["1000".to_string()],
2794            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2795            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2796            42,
2797        );
2798
2799        // Without master data
2800        assert!(!generator.is_using_real_master_data());
2801
2802        // Connect master data
2803        let generator_with_data = generator
2804            .with_vendors(&vendors)
2805            .with_customers(&customers)
2806            .with_materials(&materials);
2807
2808        // Should now be using real master data
2809        assert!(generator_with_data.is_using_real_master_data());
2810    }
2811
2812    #[test]
2813    fn test_with_master_data_convenience_method() {
2814        let mut coa_gen =
2815            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2816        let coa = Arc::new(coa_gen.generate());
2817
2818        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2819        let customers = vec![Customer::new(
2820            "C-001",
2821            "Customer One",
2822            CustomerType::Corporate,
2823        )];
2824        let materials = vec![Material::new(
2825            "MAT-001",
2826            "Material One",
2827            MaterialType::RawMaterial,
2828        )];
2829
2830        let generator = JournalEntryGenerator::new_with_params(
2831            TransactionConfig::default(),
2832            coa,
2833            vec!["1000".to_string()],
2834            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2835            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2836            42,
2837        )
2838        .with_master_data(&vendors, &customers, &materials);
2839
2840        assert!(generator.is_using_real_master_data());
2841    }
2842
2843    #[test]
2844    fn test_stress_factors_increase_error_rate() {
2845        let mut coa_gen =
2846            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2847        let coa = Arc::new(coa_gen.generate());
2848
2849        let generator = JournalEntryGenerator::new_with_params(
2850            TransactionConfig::default(),
2851            coa,
2852            vec!["1000".to_string()],
2853            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2854            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2855            42,
2856        );
2857
2858        let base_rate = 0.1;
2859
2860        // Regular day - no stress factors
2861        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2862        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2863        assert!(
2864            (regular_rate - base_rate).abs() < 0.01,
2865            "Regular day should have minimal stress factor adjustment"
2866        );
2867
2868        // Month end - 50% more errors
2869        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2870        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2871        assert!(
2872            month_end_rate > regular_rate,
2873            "Month end should have higher error rate than regular day"
2874        );
2875
2876        // Year end - double the error rate
2877        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2878        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2879        assert!(
2880            year_end_rate > month_end_rate,
2881            "Year end should have highest error rate"
2882        );
2883
2884        // Friday stress
2885        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2886        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2887        assert!(
2888            friday_rate > regular_rate,
2889            "Friday should have higher error rate than mid-week"
2890        );
2891
2892        // Monday stress
2893        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2894        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2895        assert!(
2896            monday_rate > regular_rate,
2897            "Monday should have higher error rate than mid-week"
2898        );
2899    }
2900
2901    #[test]
2902    fn test_batching_produces_similar_entries() {
2903        let mut coa_gen =
2904            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2905        let coa = Arc::new(coa_gen.generate());
2906
2907        // Use seed 123 which is more likely to trigger batching
2908        let mut je_gen = JournalEntryGenerator::new_with_params(
2909            TransactionConfig::default(),
2910            coa,
2911            vec!["1000".to_string()],
2912            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2913            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2914            123,
2915        )
2916        .with_persona_errors(false); // Disable to ensure balanced entries
2917
2918        // Generate many entries - at 15% batch rate, should see some batches
2919        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2920
2921        // Check that all entries are balanced (batched or not)
2922        for entry in &entries {
2923            assert!(
2924                entry.is_balanced(),
2925                "All entries including batched should be balanced"
2926            );
2927        }
2928
2929        // Count entries with same-day posting dates (batch indicator)
2930        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2931            std::collections::HashMap::new();
2932        for entry in &entries {
2933            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2934        }
2935
2936        // With batching, some dates should have multiple entries
2937        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2938        assert!(
2939            dates_with_multiple > 0,
2940            "With batching, should see some dates with multiple entries"
2941        );
2942    }
2943
2944    #[test]
2945    fn test_temporal_patterns_business_days() {
2946        use datasynth_config::schema::{
2947            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2948        };
2949
2950        let mut coa_gen =
2951            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2952        let coa = Arc::new(coa_gen.generate());
2953
2954        // Create temporal patterns config with business days enabled
2955        let temporal_config = TemporalPatternsConfig {
2956            enabled: true,
2957            business_days: BusinessDaySchemaConfig {
2958                enabled: true,
2959                ..Default::default()
2960            },
2961            calendars: CalendarSchemaConfig {
2962                regions: vec!["US".to_string()],
2963                custom_holidays: vec![],
2964            },
2965            ..Default::default()
2966        };
2967
2968        let mut je_gen = JournalEntryGenerator::new_with_params(
2969            TransactionConfig::default(),
2970            coa,
2971            vec!["1000".to_string()],
2972            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2973            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2974            42,
2975        )
2976        .with_temporal_patterns(temporal_config, 42)
2977        .with_persona_errors(false);
2978
2979        // Generate entries and verify none fall on weekends
2980        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2981
2982        for entry in &entries {
2983            let weekday = entry.header.posting_date.weekday();
2984            assert!(
2985                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2986                "Posting date {:?} should not be a weekend",
2987                entry.header.posting_date
2988            );
2989        }
2990    }
2991
2992    #[test]
2993    fn test_default_generation_filters_weekends() {
2994        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2995        // This tests the fix where new_with_full_config always creates a default
2996        // BusinessDayCalculator with US holidays as a fallback.
2997        let mut coa_gen =
2998            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2999        let coa = Arc::new(coa_gen.generate());
3000
3001        let mut je_gen = JournalEntryGenerator::new_with_params(
3002            TransactionConfig::default(),
3003            coa,
3004            vec!["1000".to_string()],
3005            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3006            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3007            42,
3008        )
3009        .with_persona_errors(false);
3010
3011        let total = 500;
3012        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3013
3014        let weekend_count = entries
3015            .iter()
3016            .filter(|e| {
3017                let wd = e.header.posting_date.weekday();
3018                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
3019            })
3020            .count();
3021
3022        let weekend_pct = weekend_count as f64 / total as f64;
3023        assert!(
3024            weekend_pct < 0.05,
3025            "Expected weekend entries <5% of total without temporal_patterns enabled, \
3026             but got {:.1}% ({}/{})",
3027            weekend_pct * 100.0,
3028            weekend_count,
3029            total
3030        );
3031    }
3032
3033    #[test]
3034    fn test_document_type_derived_from_business_process() {
3035        let mut coa_gen =
3036            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3037        let coa = Arc::new(coa_gen.generate());
3038
3039        let mut je_gen = JournalEntryGenerator::new_with_params(
3040            TransactionConfig::default(),
3041            coa,
3042            vec!["1000".to_string()],
3043            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3044            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3045            99,
3046        )
3047        .with_persona_errors(false)
3048        .with_batching(false);
3049
3050        let total = 200;
3051        let mut doc_types = std::collections::HashSet::new();
3052        let mut sa_count = 0_usize;
3053
3054        for _ in 0..total {
3055            let entry = je_gen.generate();
3056            let dt = &entry.header.document_type;
3057            doc_types.insert(dt.clone());
3058            if dt == "SA" {
3059                sa_count += 1;
3060            }
3061        }
3062
3063        // Should have more than 3 distinct document types
3064        assert!(
3065            doc_types.len() > 3,
3066            "Expected >3 distinct document types, got {} ({:?})",
3067            doc_types.len(),
3068            doc_types,
3069        );
3070
3071        // "SA" should be less than 50% (R2R is 20% of the weight)
3072        let sa_pct = sa_count as f64 / total as f64;
3073        assert!(
3074            sa_pct < 0.50,
3075            "Expected SA <50%, got {:.1}% ({}/{})",
3076            sa_pct * 100.0,
3077            sa_count,
3078            total,
3079        );
3080    }
3081
3082    #[test]
3083    fn test_enrich_line_items_account_description() {
3084        let mut coa_gen =
3085            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3086        let coa = Arc::new(coa_gen.generate());
3087
3088        let mut je_gen = JournalEntryGenerator::new_with_params(
3089            TransactionConfig::default(),
3090            coa,
3091            vec!["1000".to_string()],
3092            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3093            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3094            42,
3095        )
3096        .with_persona_errors(false);
3097
3098        let total = 200;
3099        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3100
3101        // Count lines with account_description populated
3102        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3103        let lines_with_desc: usize = entries
3104            .iter()
3105            .flat_map(|e| &e.lines)
3106            .filter(|l| l.account_description.is_some())
3107            .count();
3108
3109        let desc_pct = lines_with_desc as f64 / total_lines as f64;
3110        assert!(
3111            desc_pct > 0.95,
3112            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
3113            desc_pct * 100.0,
3114            lines_with_desc,
3115            total_lines,
3116        );
3117    }
3118
3119    #[test]
3120    fn test_enrich_line_items_cost_center_for_expense_accounts() {
3121        let mut coa_gen =
3122            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3123        let coa = Arc::new(coa_gen.generate());
3124
3125        let mut je_gen = JournalEntryGenerator::new_with_params(
3126            TransactionConfig::default(),
3127            coa,
3128            vec!["1000".to_string()],
3129            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3130            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3131            42,
3132        )
3133        .with_persona_errors(false);
3134
3135        let total = 300;
3136        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3137
3138        // Count expense account lines (5xxx/6xxx) with cost_center populated
3139        let expense_lines: Vec<&JournalEntryLine> = entries
3140            .iter()
3141            .flat_map(|e| &e.lines)
3142            .filter(|l| {
3143                let first = l.gl_account.chars().next().unwrap_or('0');
3144                first == '5' || first == '6'
3145            })
3146            .collect();
3147
3148        if !expense_lines.is_empty() {
3149            let with_cc = expense_lines
3150                .iter()
3151                .filter(|l| l.cost_center.is_some())
3152                .count();
3153            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
3154            assert!(
3155                cc_pct > 0.80,
3156                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
3157                cc_pct * 100.0,
3158                with_cc,
3159                expense_lines.len(),
3160            );
3161        }
3162    }
3163
3164    #[test]
3165    fn test_enrich_line_items_profit_center_and_line_text() {
3166        let mut coa_gen =
3167            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3168        let coa = Arc::new(coa_gen.generate());
3169
3170        let mut je_gen = JournalEntryGenerator::new_with_params(
3171            TransactionConfig::default(),
3172            coa,
3173            vec!["1000".to_string()],
3174            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3175            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3176            42,
3177        )
3178        .with_persona_errors(false);
3179
3180        let total = 100;
3181        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3182
3183        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3184
3185        // All lines should have profit_center
3186        let with_pc = entries
3187            .iter()
3188            .flat_map(|e| &e.lines)
3189            .filter(|l| l.profit_center.is_some())
3190            .count();
3191        let pc_pct = with_pc as f64 / total_lines as f64;
3192        assert!(
3193            pc_pct > 0.95,
3194            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3195            pc_pct * 100.0,
3196            with_pc,
3197            total_lines,
3198        );
3199
3200        // All lines should have line_text (either from template or header fallback)
3201        let with_text = entries
3202            .iter()
3203            .flat_map(|e| &e.lines)
3204            .filter(|l| l.line_text.is_some())
3205            .count();
3206        let text_pct = with_text as f64 / total_lines as f64;
3207        assert!(
3208            text_pct > 0.95,
3209            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3210            text_pct * 100.0,
3211            with_text,
3212            total_lines,
3213        );
3214    }
3215
3216    // --- ISA 240 audit flag tests ---
3217
3218    #[test]
3219    fn test_je_has_audit_flags() {
3220        let mut coa_gen =
3221            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3222        let coa = Arc::new(coa_gen.generate());
3223
3224        let mut je_gen = JournalEntryGenerator::new_with_params(
3225            TransactionConfig::default(),
3226            coa,
3227            vec!["1000".to_string()],
3228            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3229            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3230            42,
3231        )
3232        .with_persona_errors(false);
3233
3234        for _ in 0..100 {
3235            let entry = je_gen.generate();
3236
3237            // source_system should always be non-empty
3238            assert!(
3239                !entry.header.source_system.is_empty(),
3240                "source_system should be populated, got empty string"
3241            );
3242
3243            // created_by should always be non-empty (already tested elsewhere, but confirm)
3244            assert!(
3245                !entry.header.created_by.is_empty(),
3246                "created_by should be populated"
3247            );
3248
3249            // created_date should always be populated
3250            assert!(
3251                entry.header.created_date.is_some(),
3252                "created_date should be populated"
3253            );
3254        }
3255    }
3256
3257    #[test]
3258    fn test_manual_entry_rate() {
3259        let mut coa_gen =
3260            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3261        let coa = Arc::new(coa_gen.generate());
3262
3263        let mut je_gen = JournalEntryGenerator::new_with_params(
3264            TransactionConfig::default(),
3265            coa,
3266            vec!["1000".to_string()],
3267            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3268            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3269            42,
3270        )
3271        .with_persona_errors(false)
3272        .with_batching(false);
3273
3274        let total = 1000;
3275        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3276
3277        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3278        let manual_rate = manual_count as f64 / total as f64;
3279
3280        // Default source_distribution.manual is typically around 0.05-0.15
3281        // Allow a wide tolerance for statistical variation
3282        assert!(
3283            manual_rate > 0.01 && manual_rate < 0.50,
3284            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3285            manual_rate * 100.0,
3286            manual_count,
3287            total,
3288        );
3289
3290        // is_manual should match TransactionSource::Manual
3291        for entry in &entries {
3292            let source_is_manual = entry.header.source == TransactionSource::Manual;
3293            assert_eq!(
3294                entry.header.is_manual, source_is_manual,
3295                "is_manual should match source == Manual"
3296            );
3297        }
3298    }
3299
3300    #[test]
3301    fn test_manual_source_consistency() {
3302        let mut coa_gen =
3303            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3304        let coa = Arc::new(coa_gen.generate());
3305
3306        let mut je_gen = JournalEntryGenerator::new_with_params(
3307            TransactionConfig::default(),
3308            coa,
3309            vec!["1000".to_string()],
3310            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3311            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3312            42,
3313        )
3314        .with_persona_errors(false)
3315        .with_batching(false);
3316
3317        for _ in 0..500 {
3318            let entry = je_gen.generate();
3319
3320            if entry.header.is_manual {
3321                // Manual entries must have a source_system in the
3322                // `manual/...` or `spreadsheet/...` family (the bare
3323                // legacy `manual` and `spreadsheet` values are also
3324                // accepted to keep older fixtures working).
3325                let s = entry.header.source_system.as_str();
3326                assert!(
3327                    s == "manual"
3328                        || s == "spreadsheet"
3329                        || s.starts_with("manual/")
3330                        || s.starts_with("spreadsheet/"),
3331                    "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
3332                );
3333            } else {
3334                // Non-manual entries must NOT be in the manual/spreadsheet family.
3335                let s = entry.header.source_system.as_str();
3336                assert!(
3337                    !(s == "manual"
3338                        || s == "spreadsheet"
3339                        || s.starts_with("manual/")
3340                        || s.starts_with("spreadsheet/")),
3341                    "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
3342                );
3343            }
3344        }
3345    }
3346
3347    #[test]
3348    fn test_created_date_before_posting() {
3349        let mut coa_gen =
3350            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3351        let coa = Arc::new(coa_gen.generate());
3352
3353        let mut je_gen = JournalEntryGenerator::new_with_params(
3354            TransactionConfig::default(),
3355            coa,
3356            vec!["1000".to_string()],
3357            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3358            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3359            42,
3360        )
3361        .with_persona_errors(false);
3362
3363        for _ in 0..500 {
3364            let entry = je_gen.generate();
3365
3366            if let Some(created_date) = entry.header.created_date {
3367                let created_naive_date = created_date.date();
3368                assert!(
3369                    created_naive_date <= entry.header.posting_date,
3370                    "created_date ({}) should be <= posting_date ({})",
3371                    created_naive_date,
3372                    entry.header.posting_date,
3373                );
3374            }
3375        }
3376    }
3377}