Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15    TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18    AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19    DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20    PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21    ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34/// Generator for realistic journal entries.
35pub struct JournalEntryGenerator {
36    rng: ChaCha8Rng,
37    seed: u64,
38    config: TransactionConfig,
39    coa: Arc<ChartOfAccounts>,
40    companies: Vec<String>,
41    company_selector: WeightedCompanySelector,
42    line_sampler: LineItemSampler,
43    amount_sampler: AmountSampler,
44    temporal_sampler: TemporalSampler,
45    start_date: NaiveDate,
46    end_date: NaiveDate,
47    count: u64,
48    uuid_factory: DeterministicUuidFactory,
49    // Enhanced features
50    user_pool: Option<UserPool>,
51    description_generator: DescriptionGenerator,
52    reference_generator: ReferenceGenerator,
53    template_config: TemplateConfig,
54    vendor_pool: VendorPool,
55    customer_pool: CustomerPool,
56    // Material pool for realistic material references
57    material_pool: Option<MaterialPool>,
58    // Flag indicating whether we're using real master data vs defaults
59    using_real_master_data: bool,
60    // Fraud generation
61    fraud_config: FraudConfig,
62    // Persona-based error injection
63    persona_errors_enabled: bool,
64    // Approval threshold enforcement
65    approval_enabled: bool,
66    approval_threshold: rust_decimal::Decimal,
67    // SOD violation rate for approval tracking (0.0 to 1.0)
68    sod_violation_rate: f64,
69    // Batching behavior - humans often process similar items together
70    batch_state: Option<BatchState>,
71    // Temporal drift controller for simulating distribution changes over time
72    drift_controller: Option<DriftController>,
73    // Temporal patterns components
74    business_day_calculator: Option<BusinessDayCalculator>,
75    processing_lag_calculator: Option<ProcessingLagCalculator>,
76    temporal_patterns_config: Option<TemporalPatternsConfig>,
77    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
78    // sum to 1.0 (validated by config schema). Default matches the legacy
79    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
80    business_process_weights: [(BusinessProcess, f64); 5],
81    // v3.4.0 advanced distributions (mixture models + industry profiles).
82    // None preserves v3.3.2 byte-for-byte behavior; populated only when the
83    // caller opts in via [`set_advanced_distributions`].
84    advanced_amount_sampler: Option<AdvancedAmountSampler>,
85    // v3.5.3+ conditional amount override. Populated when
86    // `config.distributions.conditional` contains an entry where
87    // `output_field == "amount"` and `input_field ∈ {"month",
88    // "quarter", "constant"}`. Applied *after* the fraud-pattern /
89    // advanced-sampler / legacy-sampler cascade on non-fraud entries
90    // so it can steer amounts by calendar context without disturbing
91    // fraud semantics.
92    conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93    // v3.5.4+ Gaussian copula for amount↔line_count correlation. When
94    // populated, each non-fraud JE draws a (u, v) pair; u nudges amount
95    // via a `(0.75 + 0.5*u)` multiplier and v biases line_count toward
96    // the upper/lower end of its range. Produces observable Spearman
97    // correlation without rewiring existing samplers for inverse-CDF.
98    correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102    (BusinessProcess::O2C, 0.35),
103    (BusinessProcess::P2P, 0.30),
104    (BusinessProcess::R2R, 0.20),
105    (BusinessProcess::H2R, 0.10),
106    (BusinessProcess::A2R, 0.05),
107];
108
109/// Map the schema-level [`datasynth_config::schema::IndustryProfileType`]
110/// onto the distributions-layer [`IndustryType`], then return that industry's
111/// pre-configured `sales_amounts` mixture. Used as a fallback when the
112/// caller enables `distributions.amounts` but supplies no components.
113/// Per-entry context channels for conditional-distribution overrides.
114///
115/// v4.1.0+ supported `input_field` values:
116///
117///   - `"month"` — posting-date month (1..=12)
118///   - `"quarter"` — posting-date quarter (1..=4)
119///   - `"year"` — posting-date year (e.g. 2026.0)
120///   - `"day_of_week"` — 1 (Mon) .. 7 (Sun)
121///   - `"day_of_month"` — 1..=31
122///   - `"day_of_year"` — 1..=366
123///   - `"week_of_year"` — 1..=53
124///   - `"is_period_end"` — 1.0 when posting_date is the last business
125///     day of the month, else 0.0
126///   - `"is_quarter_end"` — 1.0 when posting_date is in a quarter-end
127///     month AND is the last business day, else 0.0
128///   - `"is_year_end"` — 1.0 when posting_date is in December AND is
129///     the last business day, else 0.0
130///   - `"constant"` / empty — always 0.0 (treats as unconditional)
131///
132/// Unsupported values cause the conditional rule to be silently ignored
133/// to keep runtime robust against user typos.
134impl JournalEntryGenerator {
135    fn supported_conditional_input(field: &str) -> bool {
136        matches!(
137            field,
138            "month"
139                | "quarter"
140                | "year"
141                | "day_of_week"
142                | "day_of_month"
143                | "day_of_year"
144                | "week_of_year"
145                | "is_period_end"
146                | "is_quarter_end"
147                | "is_year_end"
148                | "constant"
149                | ""
150        )
151    }
152
153    fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
154        let input_field = match self
155            .conditional_amount_override
156            .as_ref()
157            .map(|s| s.config().input_field.as_str())
158        {
159            Some(f) => f,
160            None => return 0.0,
161        };
162
163        let is_last_business_day = |d: chrono::NaiveDate| -> bool {
164            // Last day-of-month → is_period_end. Handles Feb/leap-year
165            // via chrono's num_days_from_ce roundabout; simpler path:
166            // if adding 1 day moves to a different month, this is EOM.
167            let next = d.succ_opt();
168            match next {
169                Some(n) => n.month() != d.month(),
170                None => true,
171            }
172        };
173
174        match input_field {
175            "month" => posting_date.month() as f64,
176            "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
177            "year" => posting_date.year() as f64,
178            "day_of_week" => posting_date.weekday().number_from_monday() as f64,
179            "day_of_month" => posting_date.day() as f64,
180            "day_of_year" => posting_date.ordinal() as f64,
181            "week_of_year" => posting_date.iso_week().week() as f64,
182            "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
183            "is_quarter_end" => {
184                let m = posting_date.month();
185                let is_q_month = matches!(m, 3 | 6 | 9 | 12);
186                f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
187            }
188            "is_year_end" => f64::from(u8::from(
189                posting_date.month() == 12 && is_last_business_day(posting_date),
190            )),
191            _ => 0.0,
192        }
193    }
194}
195
196fn industry_profile_to_log_normal(
197    p: datasynth_config::schema::IndustryProfileType,
198) -> datasynth_core::distributions::LogNormalMixtureConfig {
199    use datasynth_config::schema::IndustryProfileType as P;
200    let industry = match p {
201        P::Retail => IndustryType::Retail,
202        P::Manufacturing => IndustryType::Manufacturing,
203        P::FinancialServices => IndustryType::FinancialServices,
204        P::Healthcare => IndustryType::Healthcare,
205        P::Technology => IndustryType::Technology,
206    };
207    IndustryAmountProfile::for_industry(industry).sales_amounts
208}
209
210/// State for tracking batch processing behavior.
211///
212/// When humans process transactions, they often batch similar items together
213/// (e.g., processing all invoices from one vendor, entering similar expenses).
214#[derive(Clone)]
215struct BatchState {
216    /// The base entry template to vary
217    base_account_number: String,
218    base_amount: rust_decimal::Decimal,
219    base_business_process: Option<BusinessProcess>,
220    base_posting_date: NaiveDate,
221    /// Remaining entries in this batch
222    remaining: u8,
223}
224
225impl JournalEntryGenerator {
226    /// Create a new journal entry generator.
227    pub fn new_with_params(
228        config: TransactionConfig,
229        coa: Arc<ChartOfAccounts>,
230        companies: Vec<String>,
231        start_date: NaiveDate,
232        end_date: NaiveDate,
233        seed: u64,
234    ) -> Self {
235        Self::new_with_full_config(
236            config,
237            coa,
238            companies,
239            start_date,
240            end_date,
241            seed,
242            TemplateConfig::default(),
243            None,
244        )
245    }
246
247    /// Create a new journal entry generator with full configuration.
248    #[allow(clippy::too_many_arguments)]
249    pub fn new_with_full_config(
250        config: TransactionConfig,
251        coa: Arc<ChartOfAccounts>,
252        companies: Vec<String>,
253        start_date: NaiveDate,
254        end_date: NaiveDate,
255        seed: u64,
256        template_config: TemplateConfig,
257        user_pool: Option<UserPool>,
258    ) -> Self {
259        // Initialize user pool if not provided
260        let user_pool = user_pool.or_else(|| {
261            if template_config.names.generate_realistic_names {
262                let user_gen_config = UserGeneratorConfig {
263                    culture_distribution: vec![
264                        (
265                            datasynth_core::templates::NameCulture::WesternUs,
266                            template_config.names.culture_distribution.western_us,
267                        ),
268                        (
269                            datasynth_core::templates::NameCulture::Hispanic,
270                            template_config.names.culture_distribution.hispanic,
271                        ),
272                        (
273                            datasynth_core::templates::NameCulture::German,
274                            template_config.names.culture_distribution.german,
275                        ),
276                        (
277                            datasynth_core::templates::NameCulture::French,
278                            template_config.names.culture_distribution.french,
279                        ),
280                        (
281                            datasynth_core::templates::NameCulture::Chinese,
282                            template_config.names.culture_distribution.chinese,
283                        ),
284                        (
285                            datasynth_core::templates::NameCulture::Japanese,
286                            template_config.names.culture_distribution.japanese,
287                        ),
288                        (
289                            datasynth_core::templates::NameCulture::Indian,
290                            template_config.names.culture_distribution.indian,
291                        ),
292                    ],
293                    email_domain: template_config.names.email_domain.clone(),
294                    generate_realistic_names: true,
295                };
296                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
297                Some(user_gen.generate_standard(&companies))
298            } else {
299                None
300            }
301        });
302
303        // Initialize reference generator
304        let mut ref_gen = ReferenceGenerator::new(
305            start_date.year(),
306            companies
307                .first()
308                .map(std::string::String::as_str)
309                .unwrap_or("1000"),
310        );
311        ref_gen.set_prefix(
312            ReferenceType::Invoice,
313            &template_config.references.invoice_prefix,
314        );
315        ref_gen.set_prefix(
316            ReferenceType::PurchaseOrder,
317            &template_config.references.po_prefix,
318        );
319        ref_gen.set_prefix(
320            ReferenceType::SalesOrder,
321            &template_config.references.so_prefix,
322        );
323
324        // Create weighted company selector (uniform weights for this constructor)
325        let company_selector = WeightedCompanySelector::uniform(companies.clone());
326
327        Self {
328            rng: seeded_rng(seed, 0),
329            seed,
330            config: config.clone(),
331            coa,
332            companies,
333            company_selector,
334            line_sampler: LineItemSampler::with_config(
335                seed + 1,
336                config.line_item_distribution.clone(),
337                config.even_odd_distribution.clone(),
338                config.debit_credit_distribution.clone(),
339            ),
340            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
341            temporal_sampler: TemporalSampler::with_config(
342                seed + 3,
343                config.seasonality.clone(),
344                WorkingHoursConfig::default(),
345                Vec::new(),
346            ),
347            start_date,
348            end_date,
349            count: 0,
350            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
351            user_pool,
352            description_generator: DescriptionGenerator::new(),
353            reference_generator: ref_gen,
354            template_config,
355            vendor_pool: VendorPool::standard(),
356            customer_pool: CustomerPool::standard(),
357            material_pool: None,
358            using_real_master_data: false,
359            fraud_config: FraudConfig::default(),
360            persona_errors_enabled: true, // Enable by default for realism
361            approval_enabled: true,       // Enable by default for realism
362            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
363            sod_violation_rate: 0.10,     // 10% default SOD violation rate
364            batch_state: None,
365            drift_controller: None,
366            // Always provide a basic BusinessDayCalculator so that weekend/holiday
367            // filtering is active even when temporal_patterns is not explicitly enabled.
368            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
369                Region::US,
370                start_date.year(),
371            ))),
372            processing_lag_calculator: None,
373            temporal_patterns_config: None,
374            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
375            advanced_amount_sampler: None,
376            conditional_amount_override: None,
377            correlation_copula: None,
378        }
379    }
380
381    /// Wire v3.4.0 advanced distributions. When the caller's config has
382    /// `distributions.enabled = true` AND `distributions.amounts.enabled =
383    /// true`, the journal-entry generator routes non-fraud amount sampling
384    /// through an [`AdvancedAmountSampler`] (log-normal or Gaussian mixture).
385    ///
386    /// When `distributions.industry_profile` is `Some`, the caller's
387    /// explicitly configured components override nothing — if the component
388    /// list is empty, the industry profile's `sales_amounts` mixture is used
389    /// instead. Explicit components always win.
390    ///
391    /// Returning `Ok(())` with no side effect is intentional for the
392    /// following no-op cases, so callers can unconditionally invoke this:
393    ///   - `config.enabled = false`
394    ///   - `config.amounts.enabled = false`
395    ///   - empty component list with no industry profile
396    ///
397    /// Errors propagate from mixture validation (e.g. weights not summing
398    /// to 1.0, non-positive sigma).
399    pub fn set_advanced_distributions(
400        &mut self,
401        config: &AdvancedDistributionConfig,
402        seed: u64,
403    ) -> Result<(), String> {
404        if !config.enabled {
405            return Ok(());
406        }
407
408        // v3.5.3+: build a conditional-amount override when the config
409        // declares a rule with `output_field == "amount"` and a supported
410        // input field. The override is applied *after* the standard
411        // cascade so it doesn't disturb fraud-path sampling. Unsupported
412        // input fields are ignored with a trace log.
413        self.conditional_amount_override = config
414            .conditional
415            .iter()
416            .find(|c| {
417                c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
418            })
419            .and_then(|c| {
420                datasynth_core::distributions::ConditionalSampler::new(
421                    seed.wrapping_add(17),
422                    c.to_core_config(),
423                )
424                .ok()
425            });
426
427        // v4.1.0+: all 5 copula types wired (Gaussian / Clayton /
428        // Gumbel / Frank / Student-t). The `BivariateCopulaSampler`
429        // already implements each; v3.5.4 had a filter limiting to
430        // Gaussian only — lifted here now that the smoke test matrix
431        // covers all types.
432        self.correlation_copula = config
433            .correlations
434            .to_core_config_for_pair("amount", "line_count")
435            .and_then(|copula_cfg| {
436                datasynth_core::distributions::BivariateCopulaSampler::new(
437                    seed.wrapping_add(31),
438                    copula_cfg,
439                )
440                .ok()
441            });
442
443        // v3.4.4+: Pareto takes precedence over mixture models when set.
444        // This supports heavy-tailed amount distributions (capex, strategic
445        // contracts, fraud) that log-normal/Gaussian mixtures can't model
446        // as sharply.
447        if let Some(pareto) = &config.pareto {
448            if pareto.enabled {
449                let core_cfg = pareto.to_core_config();
450                self.advanced_amount_sampler =
451                    Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
452                return Ok(());
453            }
454        }
455
456        if !config.amounts.enabled {
457            return Ok(());
458        }
459
460        match config.amounts.distribution_type {
461            MixtureDistributionType::LogNormal => {
462                let lognormal_cfg = config
463                    .amounts
464                    .to_log_normal_config()
465                    .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
466                if let Some(cfg) = lognormal_cfg {
467                    self.advanced_amount_sampler =
468                        Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
469                }
470            }
471            MixtureDistributionType::Gaussian => {
472                if let Some(cfg) = config.amounts.to_gaussian_config() {
473                    self.advanced_amount_sampler =
474                        Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
475                }
476            }
477        }
478
479        Ok(())
480    }
481
482    /// Override the business-process volume mix. Weights map directly to the
483    /// `business_processes.*_weight` YAML config; they do not have to sum to
484    /// exactly 1.0 (they're normalized via `weighted_select`).
485    pub fn set_business_process_weights(
486        &mut self,
487        o2c: f64,
488        p2p: f64,
489        r2r: f64,
490        h2r: f64,
491        a2r: f64,
492    ) {
493        self.business_process_weights = [
494            (BusinessProcess::O2C, o2c),
495            (BusinessProcess::P2P, p2p),
496            (BusinessProcess::R2R, r2r),
497            (BusinessProcess::H2R, h2r),
498            (BusinessProcess::A2R, a2r),
499        ];
500    }
501
502    /// Create from a full GeneratorConfig.
503    ///
504    /// This constructor uses the volume_weight from company configs
505    /// for weighted company selection, and fraud config from GeneratorConfig.
506    pub fn from_generator_config(
507        full_config: &GeneratorConfig,
508        coa: Arc<ChartOfAccounts>,
509        start_date: NaiveDate,
510        end_date: NaiveDate,
511        seed: u64,
512    ) -> Self {
513        let companies: Vec<String> = full_config
514            .companies
515            .iter()
516            .map(|c| c.code.clone())
517            .collect();
518
519        // Create weighted selector using volume_weight from company configs
520        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
521
522        let mut generator = Self::new_with_full_config(
523            full_config.transactions.clone(),
524            coa,
525            companies,
526            start_date,
527            end_date,
528            seed,
529            full_config.templates.clone(),
530            None,
531        );
532
533        // Override the uniform selector with weighted selector
534        generator.company_selector = company_selector;
535
536        // Set fraud config
537        generator.fraud_config = full_config.fraud.clone();
538
539        // Configure temporal patterns if enabled
540        let temporal_config = &full_config.temporal_patterns;
541        if temporal_config.enabled {
542            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
543        }
544
545        generator
546    }
547
548    /// Configure temporal patterns including business day calculations and processing lags.
549    ///
550    /// This enables realistic temporal behavior including:
551    /// - Business day awareness (no postings on weekends/holidays)
552    /// - Processing lag modeling (event-to-posting delays)
553    /// - Period-end dynamics (volume spikes at month/quarter/year end)
554    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
555        // Create business day calculator if enabled
556        if config.business_days.enabled {
557            let region = config
558                .calendars
559                .regions
560                .first()
561                .map(|r| Self::parse_region(r))
562                .unwrap_or(Region::US);
563
564            let calendar = HolidayCalendar::new(region, self.start_date.year());
565            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
566        }
567
568        // Create processing lag calculator if enabled
569        if config.processing_lags.enabled {
570            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
571            self.processing_lag_calculator =
572                Some(ProcessingLagCalculator::with_config(seed, lag_config));
573        }
574
575        // Create period-end dynamics if configured
576        let model = config.period_end.model.as_deref().unwrap_or("flat");
577        if model != "flat"
578            || config
579                .period_end
580                .month_end
581                .as_ref()
582                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
583        {
584            let dynamics = Self::convert_period_end_config(&config.period_end);
585            self.temporal_sampler.set_period_end_dynamics(dynamics);
586        }
587
588        self.temporal_patterns_config = Some(config);
589        self
590    }
591
592    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
593    ///
594    /// This is an alternative to [`with_temporal_patterns`] that derives the
595    /// holiday calendar from a country-pack definition rather than the built-in
596    /// region-based calendars.  All other temporal behaviour (business-day
597    /// adjustment, processing lags, period-end dynamics) is configured
598    /// identically.
599    pub fn with_country_pack_temporal(
600        mut self,
601        config: TemporalPatternsConfig,
602        seed: u64,
603        pack: &CountryPack,
604    ) -> Self {
605        // Create business day calculator using the country pack calendar
606        if config.business_days.enabled {
607            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
608            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
609        }
610
611        // Create processing lag calculator if enabled
612        if config.processing_lags.enabled {
613            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
614            self.processing_lag_calculator =
615                Some(ProcessingLagCalculator::with_config(seed, lag_config));
616        }
617
618        // Create period-end dynamics if configured
619        let model = config.period_end.model.as_deref().unwrap_or("flat");
620        if model != "flat"
621            || config
622                .period_end
623                .month_end
624                .as_ref()
625                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
626        {
627            let dynamics = Self::convert_period_end_config(&config.period_end);
628            self.temporal_sampler.set_period_end_dynamics(dynamics);
629        }
630
631        self.temporal_patterns_config = Some(config);
632        self
633    }
634
635    /// Convert schema processing lag config to core config.
636    fn convert_processing_lag_config(
637        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
638    ) -> ProcessingLagConfig {
639        let mut config = ProcessingLagConfig {
640            enabled: schema.enabled,
641            ..Default::default()
642        };
643
644        // Helper to convert lag schema to distribution
645        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
646            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
647            if let Some(min) = lag.min_hours {
648                dist.min_lag_hours = min;
649            }
650            if let Some(max) = lag.max_hours {
651                dist.max_lag_hours = max;
652            }
653            dist
654        };
655
656        // Apply event-specific lags
657        if let Some(ref lag) = schema.sales_order_lag {
658            config
659                .event_lags
660                .insert(EventType::SalesOrder, convert_lag(lag));
661        }
662        if let Some(ref lag) = schema.purchase_order_lag {
663            config
664                .event_lags
665                .insert(EventType::PurchaseOrder, convert_lag(lag));
666        }
667        if let Some(ref lag) = schema.goods_receipt_lag {
668            config
669                .event_lags
670                .insert(EventType::GoodsReceipt, convert_lag(lag));
671        }
672        if let Some(ref lag) = schema.invoice_receipt_lag {
673            config
674                .event_lags
675                .insert(EventType::InvoiceReceipt, convert_lag(lag));
676        }
677        if let Some(ref lag) = schema.invoice_issue_lag {
678            config
679                .event_lags
680                .insert(EventType::InvoiceIssue, convert_lag(lag));
681        }
682        if let Some(ref lag) = schema.payment_lag {
683            config
684                .event_lags
685                .insert(EventType::Payment, convert_lag(lag));
686        }
687        if let Some(ref lag) = schema.journal_entry_lag {
688            config
689                .event_lags
690                .insert(EventType::JournalEntry, convert_lag(lag));
691        }
692
693        // Apply cross-day posting config
694        if let Some(ref cross_day) = schema.cross_day_posting {
695            config.cross_day = CrossDayConfig {
696                enabled: cross_day.enabled,
697                probability_by_hour: cross_day.probability_by_hour.clone(),
698                ..Default::default()
699            };
700        }
701
702        config
703    }
704
705    /// Convert schema period-end config to core PeriodEndDynamics.
706    fn convert_period_end_config(
707        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
708    ) -> PeriodEndDynamics {
709        let model_type = schema.model.as_deref().unwrap_or("exponential");
710
711        // Helper to convert period config
712        let convert_period =
713            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
714             default_peak: f64|
715             -> PeriodEndConfig {
716                if let Some(p) = period {
717                    let model = match model_type {
718                        "flat" => PeriodEndModel::FlatMultiplier {
719                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
720                        },
721                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
722                            start_day: p.start_day.unwrap_or(-10),
723                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
724                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
725                            ramp_up_days: 3, // Default ramp-up period
726                        },
727                        _ => PeriodEndModel::ExponentialAcceleration {
728                            start_day: p.start_day.unwrap_or(-10),
729                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
730                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
731                            decay_rate: p.decay_rate.unwrap_or(0.3),
732                        },
733                    };
734                    PeriodEndConfig {
735                        enabled: true,
736                        model,
737                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
738                    }
739                } else {
740                    PeriodEndConfig {
741                        enabled: true,
742                        model: PeriodEndModel::ExponentialAcceleration {
743                            start_day: -10,
744                            base_multiplier: 1.0,
745                            peak_multiplier: default_peak,
746                            decay_rate: 0.3,
747                        },
748                        additional_multiplier: 1.0,
749                    }
750                }
751            };
752
753        PeriodEndDynamics::new(
754            convert_period(schema.month_end.as_ref(), 2.0),
755            convert_period(schema.quarter_end.as_ref(), 3.5),
756            convert_period(schema.year_end.as_ref(), 5.0),
757        )
758    }
759
760    /// Parse a region string into a Region enum.
761    fn parse_region(region_str: &str) -> Region {
762        match region_str.to_uppercase().as_str() {
763            "US" => Region::US,
764            "DE" => Region::DE,
765            "GB" => Region::GB,
766            "CN" => Region::CN,
767            "JP" => Region::JP,
768            "IN" => Region::IN,
769            "BR" => Region::BR,
770            "MX" => Region::MX,
771            "AU" => Region::AU,
772            "SG" => Region::SG,
773            "KR" => Region::KR,
774            "FR" => Region::FR,
775            "IT" => Region::IT,
776            "ES" => Region::ES,
777            "CA" => Region::CA,
778            _ => Region::US,
779        }
780    }
781
782    /// Set a custom company selector.
783    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
784        self.company_selector = selector;
785    }
786
787    /// Get the current company selector.
788    pub fn company_selector(&self) -> &WeightedCompanySelector {
789        &self.company_selector
790    }
791
792    /// Set fraud configuration.
793    pub fn set_fraud_config(&mut self, config: FraudConfig) {
794        self.fraud_config = config;
795    }
796
797    /// Set vendors from generated master data.
798    ///
799    /// This replaces the default vendor pool with actual generated vendors,
800    /// ensuring JEs reference real master data entities.
801    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
802        if !vendors.is_empty() {
803            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
804            self.using_real_master_data = true;
805        }
806        self
807    }
808
809    /// Set customers from generated master data.
810    ///
811    /// This replaces the default customer pool with actual generated customers,
812    /// ensuring JEs reference real master data entities.
813    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
814        if !customers.is_empty() {
815            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
816            self.using_real_master_data = true;
817        }
818        self
819    }
820
821    /// Set materials from generated master data.
822    ///
823    /// This provides material references for JEs that involve inventory movements.
824    pub fn with_materials(mut self, materials: &[Material]) -> Self {
825        if !materials.is_empty() {
826            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
827            self.using_real_master_data = true;
828        }
829        self
830    }
831
832    /// Set all master data at once for convenience.
833    ///
834    /// This is the recommended way to configure the JE generator with
835    /// generated master data to ensure data coherence.
836    pub fn with_master_data(
837        self,
838        vendors: &[Vendor],
839        customers: &[Customer],
840        materials: &[Material],
841    ) -> Self {
842        self.with_vendors(vendors)
843            .with_customers(customers)
844            .with_materials(materials)
845    }
846
847    /// Replace the user pool with one generated from a [`CountryPack`].
848    ///
849    /// This is an alternative to the default name-culture distribution that
850    /// derives name pools and weights from the country-pack's `names` section.
851    /// The existing user pool (if any) is discarded and regenerated using
852    /// [`MultiCultureNameGenerator::from_country_pack`].
853    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
854        let name_gen =
855            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
856        let config = UserGeneratorConfig {
857            // The culture distribution is embedded in the name generator
858            // itself, so we use an empty list here.
859            culture_distribution: Vec::new(),
860            email_domain: name_gen.email_domain().to_string(),
861            generate_realistic_names: true,
862        };
863        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
864        self.user_pool = Some(user_gen.generate_standard(&self.companies));
865        self
866    }
867
868    /// Check if the generator is using real master data.
869    pub fn is_using_real_master_data(&self) -> bool {
870        self.using_real_master_data
871    }
872
873    /// Determine if this transaction should be fraudulent.
874    /// Pick a realistic ERP `source_system` provenance code.
875    ///
876    /// Returns a string like `"SAP-FI/AP"`, `"manual/adjustment"`,
877    /// `"Interface/EDI"`. Uses the business process to bias toward
878    /// process-appropriate sub-modules (e.g. P2P → SAP-MM/IV, O2C →
879    /// SAP-SD/IV, H2R → SAP-HR/PR). The legacy 7-code shape
880    /// (`SAP-FI`, `SAP-MM`, etc.) is preserved as a prefix so existing
881    /// `starts_with` filters keep working.
882    ///
883    /// **Manual contract**: when `is_manual` is true the returned value
884    /// always starts with `"manual"` or `"spreadsheet"`. This is asserted
885    /// in `test_isa240_audit_flags_populated`.
886    fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
887        if is_manual {
888            // 8 manual provenance codes — all share a `manual/` or
889            // `spreadsheet/` prefix.
890            const MANUAL: &[&str] = &[
891                "manual/standard",
892                "manual/adjustment",
893                "manual/reclassification",
894                "manual/accrual",
895                "manual/reversal",
896                "manual/correction",
897                "spreadsheet/upload",
898                "spreadsheet/journal",
899            ];
900            let idx = (rng.random::<u32>() as usize) % MANUAL.len();
901            return MANUAL[idx].to_string();
902        }
903
904        // Process-aware automated provenance. Each process has a small
905        // primary set; we also mix in cross-process codes ~20% of the
906        // time so the taxonomy stays diverse without losing coherence.
907        let primary: &[&str] = match bp {
908            BusinessProcess::P2P => &[
909                "SAP-MM/PO",
910                "SAP-MM/IV",
911                "SAP-MM/IM",
912                "SAP-FI/AP",
913                "Interface/EDI",
914            ],
915            BusinessProcess::O2C => &[
916                "SAP-SD/ORD",
917                "SAP-SD/DEL",
918                "SAP-SD/IV",
919                "SAP-FI/AR",
920                "Interface/Lockbox",
921            ],
922            BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
923            BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
924            BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
925            BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
926            BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
927            // R2R, S2C, Bank, Audit, Intercompany, ProjectAccounting, Esg
928            // → fall through to a generic mix.
929            _ => &[
930                "SAP-FI/GL",
931                "SAP-FI/AP",
932                "SAP-FI/AR",
933                "SAP-FI/AA",
934                "External/SubL",
935            ],
936        };
937
938        // 80% process-appropriate, 20% cross-process (pulled from a
939        // generic pool) so the categorical distribution has long tails.
940        const CROSS: &[&str] = &[
941            "SAP-FI/GL",
942            "SAP-FI/AP",
943            "SAP-FI/AR",
944            "Interface/EDI",
945            "Interface/Bank",
946            "External/SubL",
947        ];
948        let pool = if rng.random::<f64>() < 0.80 {
949            primary
950        } else {
951            CROSS
952        };
953        let idx = (rng.random::<u32>() as usize) % pool.len();
954        pool[idx].to_string()
955    }
956
957    fn determine_fraud(&mut self) -> Option<FraudType> {
958        if !self.fraud_config.enabled {
959            return None;
960        }
961
962        // Roll for fraud based on fraud rate
963        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
964            return None;
965        }
966
967        // Select fraud type based on distribution
968        Some(self.select_fraud_type())
969    }
970
971    /// Select a fraud type based on the configured distribution.
972    fn select_fraud_type(&mut self) -> FraudType {
973        let dist = &self.fraud_config.fraud_type_distribution;
974        let roll: f64 = self.rng.random();
975
976        let mut cumulative = 0.0;
977
978        cumulative += dist.suspense_account_abuse;
979        if roll < cumulative {
980            return FraudType::SuspenseAccountAbuse;
981        }
982
983        cumulative += dist.fictitious_transaction;
984        if roll < cumulative {
985            return FraudType::FictitiousTransaction;
986        }
987
988        cumulative += dist.revenue_manipulation;
989        if roll < cumulative {
990            return FraudType::RevenueManipulation;
991        }
992
993        cumulative += dist.expense_capitalization;
994        if roll < cumulative {
995            return FraudType::ExpenseCapitalization;
996        }
997
998        cumulative += dist.split_transaction;
999        if roll < cumulative {
1000            return FraudType::SplitTransaction;
1001        }
1002
1003        cumulative += dist.timing_anomaly;
1004        if roll < cumulative {
1005            return FraudType::TimingAnomaly;
1006        }
1007
1008        cumulative += dist.unauthorized_access;
1009        if roll < cumulative {
1010            return FraudType::UnauthorizedAccess;
1011        }
1012
1013        cumulative += dist.duplicate_payment;
1014        if roll < cumulative {
1015            return FraudType::DuplicatePayment;
1016        }
1017
1018        cumulative += dist.kickback_scheme;
1019        if roll < cumulative {
1020            return FraudType::KickbackScheme;
1021        }
1022
1023        cumulative += dist.round_tripping;
1024        if roll < cumulative {
1025            return FraudType::RoundTripping;
1026        }
1027
1028        cumulative += dist.unauthorized_discount;
1029        if roll < cumulative {
1030            return FraudType::UnauthorizedDiscount;
1031        }
1032
1033        // Fallback when distribution is sub-1.0 (validator allows tolerance)
1034        FraudType::DuplicatePayment
1035    }
1036
1037    /// Map a fraud type to an amount pattern for suspicious amounts.
1038    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1039        match fraud_type {
1040            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1041                FraudAmountPattern::ThresholdAdjacent
1042            }
1043            FraudType::FictitiousTransaction
1044            | FraudType::FictitiousEntry
1045            | FraudType::SuspenseAccountAbuse
1046            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1047            FraudType::RevenueManipulation
1048            | FraudType::ExpenseCapitalization
1049            | FraudType::ImproperCapitalization
1050            | FraudType::ReserveManipulation
1051            | FraudType::UnauthorizedAccess
1052            | FraudType::PrematureRevenue
1053            | FraudType::UnderstatedLiabilities
1054            | FraudType::OverstatedAssets
1055            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1056            FraudType::DuplicatePayment
1057            | FraudType::TimingAnomaly
1058            | FraudType::SelfApproval
1059            | FraudType::ExceededApprovalLimit
1060            | FraudType::SegregationOfDutiesViolation
1061            | FraudType::UnauthorizedApproval
1062            | FraudType::CollusiveApproval
1063            | FraudType::FictitiousVendor
1064            | FraudType::ShellCompanyPayment
1065            | FraudType::Kickback
1066            | FraudType::KickbackScheme
1067            | FraudType::UnauthorizedDiscount
1068            | FraudType::RoundTripping
1069            | FraudType::InvoiceManipulation
1070            | FraudType::AssetMisappropriation
1071            | FraudType::InventoryTheft
1072            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1073            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
1074            FraudType::ImproperRevenueRecognition
1075            | FraudType::ImproperPoAllocation
1076            | FraudType::VariableConsiderationManipulation
1077            | FraudType::ContractModificationMisstatement => {
1078                FraudAmountPattern::StatisticallyImprobable
1079            }
1080            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
1081            FraudType::LeaseClassificationManipulation
1082            | FraudType::OffBalanceSheetLease
1083            | FraudType::LeaseLiabilityUnderstatement
1084            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1085            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
1086            FraudType::FairValueHierarchyManipulation
1087            | FraudType::Level3InputManipulation
1088            | FraudType::ValuationTechniqueManipulation => {
1089                FraudAmountPattern::StatisticallyImprobable
1090            }
1091            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
1092            FraudType::DelayedImpairment
1093            | FraudType::ImpairmentTestAvoidance
1094            | FraudType::CashFlowProjectionManipulation
1095            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1096            // Sourcing/Procurement Fraud
1097            FraudType::BidRigging
1098            | FraudType::PhantomVendorContract
1099            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1100            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1101            // HR/Payroll Fraud
1102            FraudType::GhostEmployeePayroll
1103            | FraudType::PayrollInflation
1104            | FraudType::DuplicateExpenseReport
1105            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1106            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1107            // O2C Fraud
1108            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1109            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1110        }
1111    }
1112
1113    /// Generate a deterministic UUID using the factory.
1114    #[inline]
1115    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1116        self.uuid_factory.next()
1117    }
1118
1119    /// Cost center pool used for expense account enrichment.
1120    const COST_CENTER_POOL: &'static [&'static str] =
1121        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1122
1123    /// Enrich journal entry line items with account descriptions, cost centers,
1124    /// profit centers, value dates, line text, and assignment fields.
1125    ///
1126    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
1127    /// and `::credit()` leave as `None`.
1128    fn enrich_line_items(&self, entry: &mut JournalEntry) {
1129        let posting_date = entry.header.posting_date;
1130        let company_code = &entry.header.company_code;
1131        let header_text = entry.header.header_text.clone();
1132        let business_process = entry.header.business_process;
1133
1134        // Derive a deterministic index from the document_id for cost center selection
1135        let doc_id_bytes = entry.header.document_id.as_bytes();
1136        let mut cc_seed: usize = 0;
1137        for &b in doc_id_bytes {
1138            cc_seed = cc_seed.wrapping_add(b as usize);
1139        }
1140
1141        for (i, line) in entry.lines.iter_mut().enumerate() {
1142            // 1. account_description: look up from CoA
1143            if line.account_description.is_none() {
1144                line.account_description = self
1145                    .coa
1146                    .get_account(&line.gl_account)
1147                    .map(|a| a.short_description.clone());
1148            }
1149
1150            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
1151            if line.cost_center.is_none() {
1152                let first_char = line.gl_account.chars().next().unwrap_or('0');
1153                if first_char == '5' || first_char == '6' {
1154                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1155                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1156                }
1157            }
1158
1159            // 3. profit_center: derive from company code + business process
1160            if line.profit_center.is_none() {
1161                let suffix = match business_process {
1162                    Some(BusinessProcess::P2P) => "-P2P",
1163                    Some(BusinessProcess::O2C) => "-O2C",
1164                    Some(BusinessProcess::R2R) => "-R2R",
1165                    Some(BusinessProcess::H2R) => "-H2R",
1166                    _ => "",
1167                };
1168                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1169            }
1170
1171            // 4. line_text: fall back to header_text if not already set
1172            if line.line_text.is_none() {
1173                line.line_text = header_text.clone();
1174            }
1175
1176            // 5. value_date: set to posting_date for AR/AP accounts
1177            if line.value_date.is_none()
1178                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1179            {
1180                line.value_date = Some(posting_date);
1181            }
1182
1183            // 6. assignment: set to vendor/customer reference for AP/AR lines
1184            if line.assignment.is_none() {
1185                if line.gl_account.starts_with("2000") {
1186                    // AP line - use vendor reference from header
1187                    if let Some(ref ht) = header_text {
1188                        // Try to extract vendor ID from header text patterns like "... - V-001"
1189                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
1190                            if vendor_part.starts_with("V-")
1191                                || vendor_part.starts_with("VENDOR")
1192                                || vendor_part.starts_with("Vendor")
1193                            {
1194                                line.assignment = Some(vendor_part.to_string());
1195                            }
1196                        }
1197                    }
1198                } else if line.gl_account.starts_with("1100") {
1199                    // AR line - use customer reference from header
1200                    if let Some(ref ht) = header_text {
1201                        if let Some(customer_part) = ht.rsplit(" - ").next() {
1202                            if customer_part.starts_with("C-")
1203                                || customer_part.starts_with("CUST")
1204                                || customer_part.starts_with("Customer")
1205                            {
1206                                line.assignment = Some(customer_part.to_string());
1207                            }
1208                        }
1209                    }
1210                }
1211            }
1212        }
1213    }
1214
1215    /// Generate a single journal entry.
1216    pub fn generate(&mut self) -> JournalEntry {
1217        debug!(
1218            count = self.count,
1219            companies = self.companies.len(),
1220            start_date = %self.start_date,
1221            end_date = %self.end_date,
1222            "Generating journal entry"
1223        );
1224
1225        // Check if we're in a batch - if so, generate a batched entry
1226        if let Some(ref state) = self.batch_state {
1227            if state.remaining > 0 {
1228                return self.generate_batched_entry();
1229            }
1230        }
1231
1232        self.count += 1;
1233
1234        // Generate deterministic document ID
1235        let document_id = self.generate_deterministic_uuid();
1236
1237        // Sample posting date
1238        let mut posting_date = self
1239            .temporal_sampler
1240            .sample_date(self.start_date, self.end_date);
1241
1242        // Adjust posting date to be a business day if business day calculator is configured
1243        if let Some(ref calc) = self.business_day_calculator {
1244            if !calc.is_business_day(posting_date) {
1245                // Move to next business day
1246                posting_date = calc.next_business_day(posting_date, false);
1247                // Ensure we don't exceed end_date
1248                if posting_date > self.end_date {
1249                    posting_date = calc.prev_business_day(self.end_date, true);
1250                }
1251            }
1252        }
1253
1254        // Select company using weighted selector
1255        let company_code = self.company_selector.select(&mut self.rng).to_string();
1256
1257        // v4.1.0+: draw a single (u, v) pair from the copula — cached for
1258        // both the amount adjustment (u) and the line-count shift (v).
1259        // None when no copula is configured.
1260        let copula_uv: Option<(f64, f64)> =
1261            self.correlation_copula.as_mut().map(|cop| cop.sample());
1262
1263        // Sample line item specification. When a copula is configured,
1264        // v drives line-count via a quantile-preserving map: integer
1265        // count `2 + floor(v * 10)` gives range [2, 11] evenly spaced
1266        // in v, so rank(v) == rank(line_count).
1267        //
1268        // v4.1.6+: upgraded from the v3.5.4 nudge (shift around
1269        // independently-drawn count) to true rank-preserving quantile
1270        // inversion, so empirical Kendall-τ now matches copula theory.
1271        let mut line_spec = self.line_sampler.sample();
1272        if let Some((_u, v)) = copula_uv {
1273            let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
1274            let old_debit = line_spec.debit_count.max(1);
1275            let old_credit = line_spec.credit_count.max(1);
1276            let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
1277                .round() as usize;
1278            let new_debit = new_debit.clamp(1, new_total - 1);
1279            let new_credit = new_total - new_debit;
1280            line_spec.total_count = new_total;
1281            line_spec.debit_count = new_debit;
1282            line_spec.credit_count = new_credit;
1283        }
1284
1285        // Determine source type using full 4-way distribution
1286        let source = self.select_source();
1287        let is_automated = matches!(
1288            source,
1289            TransactionSource::Automated | TransactionSource::Recurring
1290        );
1291
1292        // Select business process
1293        let business_process = self.select_business_process();
1294
1295        // Determine if this is a fraudulent transaction
1296        let fraud_type = self.determine_fraud();
1297        let is_fraud = fraud_type.is_some();
1298
1299        // Sample time based on source
1300        let time = self.temporal_sampler.sample_time(!is_automated);
1301        let created_at = posting_date.and_time(time).and_utc();
1302
1303        // Select user from pool or generate generic
1304        let (created_by, user_persona) = self.select_user(is_automated);
1305
1306        // Create header with deterministic UUID
1307        let mut header =
1308            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1309        header.created_at = created_at;
1310        header.source = source;
1311        header.created_by = created_by;
1312        header.user_persona = user_persona;
1313        header.business_process = Some(business_process);
1314        header.document_type = Self::document_type_for_process(business_process).to_string();
1315        header.is_fraud = is_fraud;
1316        header.fraud_type = fraud_type;
1317
1318        // --- ISA 240 audit flags ---
1319        let is_manual = matches!(source, TransactionSource::Manual);
1320        header.is_manual = is_manual;
1321
1322        // Determine source_system based on manual vs automated.
1323        //
1324        // Real ERPs typically expose 20+ distinct provenance codes per
1325        // company (one per module + sub-module + interface). The taxonomy
1326        // below is a strict superset of the legacy {manual, spreadsheet,
1327        // SAP-FI, SAP-MM, SAP-SD, interface, SAP-HR} codes so downstream
1328        // consumers that filter by prefix (e.g. `starts_with("SAP-")`)
1329        // continue to work.
1330        //
1331        // Contract preserved by the generator-level audit assertion in
1332        // `test_isa240_audit_flags_populated`:
1333        //   - manual entries → starts_with("manual") || starts_with("spreadsheet")
1334        //   - automated entries → does NOT start with "manual"/"spreadsheet"
1335        header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
1336
1337        // is_post_close: entry is in the last month of the configured period
1338        // and the posting date falls after the 25th (simulating close cutoff)
1339        let is_post_close = posting_date.month() == self.end_date.month()
1340            && posting_date.year() == self.end_date.year()
1341            && posting_date.day() > 25;
1342        header.is_post_close = is_post_close;
1343
1344        // created_date: for manual entries, same day as posting; for automated,
1345        // 0-3 days before posting_date
1346        let created_date = if is_manual {
1347            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1348        } else {
1349            let lag_days = self.rng.random_range(0i64..=3);
1350            let created_naive_date = posting_date
1351                .checked_sub_signed(chrono::Duration::days(lag_days))
1352                .unwrap_or(posting_date);
1353            created_naive_date.and_hms_opt(
1354                self.rng.random_range(8u32..=17),
1355                self.rng.random_range(0u32..=59),
1356                self.rng.random_range(0u32..=59),
1357            )
1358        };
1359        header.created_date = created_date;
1360
1361        // Generate description context
1362        let mut context =
1363            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1364
1365        // Add vendor/customer context based on business process
1366        match business_process {
1367            BusinessProcess::P2P => {
1368                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1369                    context.vendor_name = Some(vendor.name.clone());
1370                }
1371            }
1372            BusinessProcess::O2C => {
1373                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1374                    context.customer_name = Some(customer.name.clone());
1375                }
1376            }
1377            _ => {}
1378        }
1379
1380        // Generate header text if enabled
1381        if self.template_config.descriptions.generate_header_text {
1382            header.header_text = Some(self.description_generator.generate_header_text(
1383                business_process,
1384                &context,
1385                &mut self.rng,
1386            ));
1387        }
1388
1389        // Generate reference if enabled
1390        if self.template_config.references.generate_references {
1391            header.reference = Some(
1392                self.reference_generator
1393                    .generate_for_process_year(business_process, posting_date.year()),
1394            );
1395        }
1396
1397        // Derive typed source document from reference prefix
1398        header.source_document = header
1399            .reference
1400            .as_deref()
1401            .and_then(DocumentRef::parse)
1402            .or_else(|| {
1403                if header.source == TransactionSource::Manual {
1404                    Some(DocumentRef::Manual)
1405                } else {
1406                    None
1407                }
1408            });
1409
1410        // Generate line items
1411        let mut entry = JournalEntry::new(header);
1412
1413        // Generate amount - use fraud pattern if this is a fraudulent transaction.
1414        // Non-fraud path prefers the v3.4.0 advanced sampler when configured; fraud
1415        // patterns always use the legacy sampler because they target specific
1416        // thresholds (round numbers, just-under-approval amounts) that are
1417        // orthogonal to mixture models.
1418        let base_amount = if let Some(ft) = fraud_type {
1419            let pattern = self.fraud_type_to_amount_pattern(ft);
1420            self.amount_sampler.sample_fraud(pattern)
1421        } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1422            adv.sample_decimal()
1423        } else {
1424            self.amount_sampler.sample()
1425        };
1426        // v3.5.3+: if a conditional-amount override is configured and
1427        // the JE is non-fraud, re-sample the amount from the conditional
1428        // distribution using the computed context. Fraud entries bypass
1429        // this path to preserve fraud-pattern semantics (as with the
1430        // advanced sampler cascade above).
1431        let base_amount = if fraud_type.is_none() {
1432            // Compute input context BEFORE taking &mut on the sampler
1433            // to avoid borrow-checker conflict with the immutable
1434            // `conditional_input_value` call.
1435            let input = self.conditional_input_value(posting_date);
1436            if let Some(ref mut cond) = self.conditional_amount_override {
1437                cond.sample_decimal(input)
1438            } else {
1439                base_amount
1440            }
1441        } else {
1442            base_amount
1443        };
1444
1445        // v4.1.6+: if a copula is configured AND an advanced amount
1446        // sampler with a ppf is available, use true rank-preserving
1447        // inverse-CDF sampling — amount is drawn DIRECTLY from the
1448        // sampler's quantile at `u`, replacing (not nudging) the
1449        // independently-drawn base_amount. This makes empirical
1450        // Kendall-τ match the copula's theoretical τ.
1451        //
1452        // Fallback for copula-without-advanced-sampler: keep the
1453        // v4.1.0 log-scale multiplier nudge (observable correlation,
1454        // diluted magnitude).
1455        let base_amount = if fraud_type.is_none() {
1456            if let Some((u, _v)) = copula_uv {
1457                if let Some(ref adv) = self.advanced_amount_sampler {
1458                    adv.ppf_decimal(u)
1459                } else {
1460                    let log_mult = 4.0 * (u - 0.5);
1461                    let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
1462                    Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1463                }
1464            } else {
1465                base_amount
1466            }
1467        } else {
1468            base_amount
1469        };
1470
1471        // Apply temporal drift if configured
1472        let drift_adjusted_amount = {
1473            let drift = self.get_drift_adjustments(posting_date);
1474            if drift.amount_mean_multiplier != 1.0 {
1475                // Apply drift multiplier (includes seasonal factor if enabled)
1476                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1477                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1478                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1479            } else {
1480                base_amount
1481            }
1482        };
1483
1484        // Apply human variation to amounts for non-automated transactions
1485        let total_amount = if is_automated {
1486            drift_adjusted_amount // Automated systems use exact amounts
1487        } else {
1488            self.apply_human_variation(drift_adjusted_amount)
1489        };
1490
1491        // Generate debit lines
1492        let debit_amounts = self
1493            .amount_sampler
1494            .sample_summing_to(line_spec.debit_count, total_amount);
1495        for (i, amount) in debit_amounts.into_iter().enumerate() {
1496            let account_number = self.select_debit_account().account_number.clone();
1497            let mut line = JournalEntryLine::debit(
1498                entry.header.document_id,
1499                (i + 1) as u32,
1500                account_number.clone(),
1501                amount,
1502            );
1503
1504            // Generate line text if enabled
1505            if self.template_config.descriptions.generate_line_text {
1506                line.line_text = Some(self.description_generator.generate_line_text(
1507                    &account_number,
1508                    &context,
1509                    &mut self.rng,
1510                ));
1511            }
1512
1513            entry.add_line(line);
1514        }
1515
1516        // Generate credit lines - use the SAME amounts to ensure balance
1517        let credit_amounts = self
1518            .amount_sampler
1519            .sample_summing_to(line_spec.credit_count, total_amount);
1520        for (i, amount) in credit_amounts.into_iter().enumerate() {
1521            let account_number = self.select_credit_account().account_number.clone();
1522            let mut line = JournalEntryLine::credit(
1523                entry.header.document_id,
1524                (line_spec.debit_count + i + 1) as u32,
1525                account_number.clone(),
1526                amount,
1527            );
1528
1529            // Generate line text if enabled
1530            if self.template_config.descriptions.generate_line_text {
1531                line.line_text = Some(self.description_generator.generate_line_text(
1532                    &account_number,
1533                    &context,
1534                    &mut self.rng,
1535                ));
1536            }
1537
1538            entry.add_line(line);
1539        }
1540
1541        // Enrich line items with account descriptions, cost centers, etc.
1542        self.enrich_line_items(&mut entry);
1543
1544        // Apply persona-based errors if enabled and it's a human user
1545        if self.persona_errors_enabled && !is_automated {
1546            self.maybe_inject_persona_error(&mut entry);
1547        }
1548
1549        // Apply approval workflow if enabled and amount exceeds threshold
1550        if self.approval_enabled {
1551            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1552        }
1553
1554        // Populate approved_by / approval_date from the approval workflow
1555        self.populate_approval_fields(&mut entry, posting_date);
1556
1557        // Maybe start a batch of similar entries for realism
1558        self.maybe_start_batch(&entry);
1559
1560        entry
1561    }
1562
1563    /// Enable or disable persona-based error injection.
1564    ///
1565    /// When enabled, entries created by human personas have a chance
1566    /// to contain realistic human errors based on their experience level.
1567    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1568        self.persona_errors_enabled = enabled;
1569        self
1570    }
1571
1572    /// Set fraud configuration for fraud injection.
1573    ///
1574    /// When fraud is enabled in the config, transactions have a chance
1575    /// to be marked as fraudulent based on the configured fraud rate.
1576    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1577        self.fraud_config = config;
1578        self
1579    }
1580
1581    /// Check if persona errors are enabled.
1582    pub fn persona_errors_enabled(&self) -> bool {
1583        self.persona_errors_enabled
1584    }
1585
1586    /// Enable or disable batch processing behavior.
1587    ///
1588    /// When enabled (default), the generator will occasionally produce batches
1589    /// of similar entries, simulating how humans batch similar work together.
1590    pub fn with_batching(mut self, enabled: bool) -> Self {
1591        if !enabled {
1592            self.batch_state = None;
1593        }
1594        self
1595    }
1596
1597    /// Check if batch processing is enabled.
1598    pub fn batching_enabled(&self) -> bool {
1599        // Batching is implicitly enabled when not explicitly disabled
1600        true
1601    }
1602
1603    /// Maybe start a batch based on the current entry.
1604    ///
1605    /// Humans often batch similar work: processing invoices from one vendor,
1606    /// entering expense reports for a trip, reconciling similar items.
1607    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1608        // Only start batch for non-automated, non-fraud entries
1609        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1610            return;
1611        }
1612
1613        // 15% chance to start a batch (most work is not batched)
1614        if self.rng.random::<f64>() > 0.15 {
1615            return;
1616        }
1617
1618        // Extract key attributes for batching
1619        let base_account = entry
1620            .lines
1621            .first()
1622            .map(|l| l.gl_account.clone())
1623            .unwrap_or_default();
1624
1625        let base_amount = entry.total_debit();
1626
1627        self.batch_state = Some(BatchState {
1628            base_account_number: base_account,
1629            base_amount,
1630            base_business_process: entry.header.business_process,
1631            base_posting_date: entry.header.posting_date,
1632            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1633        });
1634    }
1635
1636    /// Generate an entry that's part of the current batch.
1637    ///
1638    /// Batched entries have:
1639    /// - Same or very similar business process
1640    /// - Same posting date (batched work done together)
1641    /// - Similar amounts (within ±15%)
1642    /// - Same debit account (processing similar items)
1643    fn generate_batched_entry(&mut self) -> JournalEntry {
1644        use rust_decimal::Decimal;
1645
1646        // Decrement batch counter
1647        if let Some(ref mut state) = self.batch_state {
1648            state.remaining = state.remaining.saturating_sub(1);
1649        }
1650
1651        let Some(batch) = self.batch_state.clone() else {
1652            // This is a programming error - batch_state should be set before calling this method.
1653            // Clear state and fall back to generating a standard entry instead of panicking.
1654            tracing::warn!(
1655                "generate_batched_entry called without batch_state; generating standard entry"
1656            );
1657            self.batch_state = None;
1658            return self.generate();
1659        };
1660
1661        // Use the batch's posting date (work done on same day)
1662        let posting_date = batch.base_posting_date;
1663
1664        self.count += 1;
1665        let document_id = self.generate_deterministic_uuid();
1666
1667        // Select same company (batched work is usually same company)
1668        let company_code = self.company_selector.select(&mut self.rng).to_string();
1669
1670        // Use simplified line spec for batched entries (usually 2-line)
1671        let _line_spec = LineItemSpec {
1672            total_count: 2,
1673            debit_count: 1,
1674            credit_count: 1,
1675            split_type: DebitCreditSplit::Equal,
1676        };
1677
1678        // Batched entries are always manual
1679        let source = TransactionSource::Manual;
1680
1681        // Use the batch's business process
1682        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1683
1684        // Sample time
1685        let time = self.temporal_sampler.sample_time(true);
1686        let created_at = posting_date.and_time(time).and_utc();
1687
1688        // Same user for batched work
1689        let (created_by, user_persona) = self.select_user(false);
1690
1691        // Create header
1692        let mut header =
1693            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1694        header.created_at = created_at;
1695        header.source = source;
1696        header.created_by = created_by;
1697        header.user_persona = user_persona;
1698        header.business_process = Some(business_process);
1699        header.document_type = Self::document_type_for_process(business_process).to_string();
1700
1701        // Batched manual entries have Manual source document
1702        header.source_document = Some(DocumentRef::Manual);
1703
1704        // ISA 240 audit flags for batched entries (always manual)
1705        header.is_manual = true;
1706        header.source_system = if self.rng.random::<f64>() < 0.70 {
1707            "manual".to_string()
1708        } else {
1709            "spreadsheet".to_string()
1710        };
1711        header.is_post_close = posting_date.month() == self.end_date.month()
1712            && posting_date.year() == self.end_date.year()
1713            && posting_date.day() > 25;
1714        header.created_date =
1715            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1716
1717        // Generate similar amount (within ±15% of base)
1718        let variation = self.rng.random_range(-0.15..0.15);
1719        let varied_amount =
1720            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1721        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1722
1723        // Create the entry
1724        let mut entry = JournalEntry::new(header);
1725
1726        // Use same debit account as batch base
1727        let debit_line = JournalEntryLine::debit(
1728            entry.header.document_id,
1729            1,
1730            batch.base_account_number.clone(),
1731            total_amount,
1732        );
1733        entry.add_line(debit_line);
1734
1735        // Select a credit account
1736        let credit_account = self.select_credit_account().account_number.clone();
1737        let credit_line =
1738            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1739        entry.add_line(credit_line);
1740
1741        // Enrich line items with account descriptions, cost centers, etc.
1742        self.enrich_line_items(&mut entry);
1743
1744        // Apply persona-based errors if enabled
1745        if self.persona_errors_enabled {
1746            self.maybe_inject_persona_error(&mut entry);
1747        }
1748
1749        // Apply approval workflow if enabled
1750        if self.approval_enabled {
1751            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1752        }
1753
1754        // Populate approved_by / approval_date from the approval workflow
1755        self.populate_approval_fields(&mut entry, posting_date);
1756
1757        // Clear batch state if no more entries remaining
1758        if batch.remaining <= 1 {
1759            self.batch_state = None;
1760        }
1761
1762        entry
1763    }
1764
1765    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1766    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1767        // Parse persona from the entry header
1768        let persona_str = &entry.header.user_persona;
1769        let persona = match persona_str.to_lowercase().as_str() {
1770            s if s.contains("junior") => UserPersona::JuniorAccountant,
1771            s if s.contains("senior") => UserPersona::SeniorAccountant,
1772            s if s.contains("controller") => UserPersona::Controller,
1773            s if s.contains("manager") => UserPersona::Manager,
1774            s if s.contains("executive") => UserPersona::Executive,
1775            _ => return, // Don't inject errors for unknown personas
1776        };
1777
1778        // Get base error rate from persona
1779        let base_error_rate = persona.error_rate();
1780
1781        // Apply stress factors based on posting date
1782        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1783
1784        // Check if error should occur based on adjusted rate
1785        if self.rng.random::<f64>() >= adjusted_rate {
1786            return; // No error this time
1787        }
1788
1789        // Select and inject persona-appropriate error
1790        self.inject_human_error(entry, persona);
1791    }
1792
1793    /// Apply contextual stress factors to the base error rate.
1794    ///
1795    /// Stress factors increase error likelihood during:
1796    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1797    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1798    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1799    /// - Monday morning (catch-up work): 20% more errors
1800    /// - Friday afternoon (rushing to leave): 30% more errors
1801    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1802        use chrono::Datelike;
1803
1804        let mut rate = base_rate;
1805        let day = posting_date.day();
1806        let month = posting_date.month();
1807
1808        // Year-end stress (December 28-31): double the error rate
1809        if month == 12 && day >= 28 {
1810            rate *= 2.0;
1811            return rate.min(0.5); // Cap at 50% to keep it realistic
1812        }
1813
1814        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1815        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1816            rate *= 1.75; // 75% more errors at quarter end
1817            return rate.min(0.4);
1818        }
1819
1820        // Month-end stress (last 3 days of month)
1821        if day >= 28 {
1822            rate *= 1.5; // 50% more errors at month end
1823        }
1824
1825        // Day-of-week stress effects
1826        let weekday = posting_date.weekday();
1827        match weekday {
1828            chrono::Weekday::Mon => {
1829                // Monday: catching up, often rushed
1830                rate *= 1.2;
1831            }
1832            chrono::Weekday::Fri => {
1833                // Friday: rushing to finish before weekend
1834                rate *= 1.3;
1835            }
1836            _ => {}
1837        }
1838
1839        // Cap at 40% to keep it realistic
1840        rate.min(0.4)
1841    }
1842
1843    /// Apply human-like variation to an amount.
1844    ///
1845    /// Humans don't enter perfectly calculated amounts - they:
1846    /// - Round amounts differently
1847    /// - Estimate instead of calculating exactly
1848    /// - Make small input variations
1849    ///
1850    /// This applies small variations (typically ±2%) to make amounts more realistic.
1851    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1852        use rust_decimal::Decimal;
1853
1854        // Automated transactions or very small amounts don't get variation
1855        if amount < Decimal::from(10) {
1856            return amount;
1857        }
1858
1859        // 70% chance of human variation being applied
1860        if self.rng.random::<f64>() > 0.70 {
1861            return amount;
1862        }
1863
1864        // Decide which type of human variation to apply
1865        let variation_type: u8 = self.rng.random_range(0..4);
1866
1867        match variation_type {
1868            0 => {
1869                // ±2% variation (common for estimated amounts)
1870                let variation_pct = self.rng.random_range(-0.02..0.02);
1871                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1872                (amount + variation).round_dp(2)
1873            }
1874            1 => {
1875                // Round to nearest $10
1876                let ten = Decimal::from(10);
1877                (amount / ten).round() * ten
1878            }
1879            2 => {
1880                // Round to nearest $100 (for larger amounts)
1881                if amount >= Decimal::from(500) {
1882                    let hundred = Decimal::from(100);
1883                    (amount / hundred).round() * hundred
1884                } else {
1885                    amount
1886                }
1887            }
1888            3 => {
1889                // Slight under/over payment (±$0.01 to ±$1.00)
1890                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1891                (amount + cents).max(Decimal::ZERO).round_dp(2)
1892            }
1893            _ => amount,
1894        }
1895    }
1896
1897    /// Rebalance an entry after a one-sided amount modification.
1898    ///
1899    /// When an error modifies one line's amount, this finds a line on the opposite
1900    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1901    /// same impact to maintain balance.
1902    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1903        // Find a line on the opposite side to adjust
1904        let balancing_idx = entry.lines.iter().position(|l| {
1905            if modified_was_debit {
1906                l.credit_amount > Decimal::ZERO
1907            } else {
1908                l.debit_amount > Decimal::ZERO
1909            }
1910        });
1911
1912        if let Some(idx) = balancing_idx {
1913            if modified_was_debit {
1914                entry.lines[idx].credit_amount += impact;
1915            } else {
1916                entry.lines[idx].debit_amount += impact;
1917            }
1918        }
1919    }
1920
1921    /// Inject a human-like error based on the persona.
1922    ///
1923    /// All error types maintain balance - amount modifications are applied to both sides.
1924    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1925    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1926        use rust_decimal::Decimal;
1927
1928        // Different personas make different types of errors
1929        let error_type: u8 = match persona {
1930            UserPersona::JuniorAccountant => {
1931                // Junior accountants make more varied errors
1932                self.rng.random_range(0..5)
1933            }
1934            UserPersona::SeniorAccountant => {
1935                // Senior accountants mainly make transposition errors
1936                self.rng.random_range(0..3)
1937            }
1938            UserPersona::Controller | UserPersona::Manager => {
1939                // Controllers/managers mainly make rounding or cutoff errors
1940                self.rng.random_range(3..5)
1941            }
1942            _ => return,
1943        };
1944
1945        match error_type {
1946            0 => {
1947                // Transposed digits in an amount
1948                if let Some(line) = entry.lines.get_mut(0) {
1949                    let is_debit = line.debit_amount > Decimal::ZERO;
1950                    let original_amount = if is_debit {
1951                        line.debit_amount
1952                    } else {
1953                        line.credit_amount
1954                    };
1955
1956                    // Simple digit swap in the string representation
1957                    let s = original_amount.to_string();
1958                    if s.len() >= 2 {
1959                        let chars: Vec<char> = s.chars().collect();
1960                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1961                        if chars[pos].is_ascii_digit()
1962                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1963                        {
1964                            let mut new_chars = chars;
1965                            new_chars.swap(pos, pos + 1);
1966                            if let Ok(new_amount) =
1967                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1968                            {
1969                                let impact = new_amount - original_amount;
1970
1971                                // Apply to the modified line
1972                                if is_debit {
1973                                    entry.lines[0].debit_amount = new_amount;
1974                                } else {
1975                                    entry.lines[0].credit_amount = new_amount;
1976                                }
1977
1978                                // Rebalance the entry
1979                                Self::rebalance_entry(entry, is_debit, impact);
1980
1981                                entry.header.header_text = Some(
1982                                    entry.header.header_text.clone().unwrap_or_default()
1983                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1984                                );
1985                            }
1986                        }
1987                    }
1988                }
1989            }
1990            1 => {
1991                // Wrong decimal place (off by factor of 10)
1992                if let Some(line) = entry.lines.get_mut(0) {
1993                    let is_debit = line.debit_amount > Decimal::ZERO;
1994                    let original_amount = if is_debit {
1995                        line.debit_amount
1996                    } else {
1997                        line.credit_amount
1998                    };
1999
2000                    let new_amount = original_amount * Decimal::new(10, 0);
2001                    let impact = new_amount - original_amount;
2002
2003                    // Apply to the modified line
2004                    if is_debit {
2005                        entry.lines[0].debit_amount = new_amount;
2006                    } else {
2007                        entry.lines[0].credit_amount = new_amount;
2008                    }
2009
2010                    // Rebalance the entry
2011                    Self::rebalance_entry(entry, is_debit, impact);
2012
2013                    entry.header.header_text = Some(
2014                        entry.header.header_text.clone().unwrap_or_default()
2015                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
2016                    );
2017                }
2018            }
2019            2 => {
2020                // Typo in description (doesn't affect balance)
2021                if let Some(ref mut text) = entry.header.header_text {
2022                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
2023                    let correct = ["the", "and", "with", "that", "receive"];
2024                    let idx = self.rng.random_range(0..typos.len());
2025                    if text.to_lowercase().contains(correct[idx]) {
2026                        *text = text.replace(correct[idx], typos[idx]);
2027                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
2028                    }
2029                }
2030            }
2031            3 => {
2032                // Rounding to round number
2033                if let Some(line) = entry.lines.get_mut(0) {
2034                    let is_debit = line.debit_amount > Decimal::ZERO;
2035                    let original_amount = if is_debit {
2036                        line.debit_amount
2037                    } else {
2038                        line.credit_amount
2039                    };
2040
2041                    let new_amount =
2042                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
2043                    let impact = new_amount - original_amount;
2044
2045                    // Apply to the modified line
2046                    if is_debit {
2047                        entry.lines[0].debit_amount = new_amount;
2048                    } else {
2049                        entry.lines[0].credit_amount = new_amount;
2050                    }
2051
2052                    // Rebalance the entry
2053                    Self::rebalance_entry(entry, is_debit, impact);
2054
2055                    entry.header.header_text = Some(
2056                        entry.header.header_text.clone().unwrap_or_default()
2057                            + " [HUMAN_ERROR:ROUNDED]",
2058                    );
2059                }
2060            }
2061            // Late posting marker (document date much earlier than posting
2062            // date). Doesn't create an imbalance.
2063            4 if entry.header.document_date == entry.header.posting_date => {
2064                let days_late = self.rng.random_range(5..15);
2065                entry.header.document_date =
2066                    entry.header.posting_date - chrono::Duration::days(days_late);
2067                entry.header.header_text = Some(
2068                    entry.header.header_text.clone().unwrap_or_default()
2069                        + " [HUMAN_ERROR:LATE_POSTING]",
2070                );
2071            }
2072            _ => {}
2073        }
2074    }
2075
2076    /// Apply approval workflow for high-value transactions.
2077    ///
2078    /// If the entry amount exceeds the approval threshold, simulate an
2079    /// approval workflow with appropriate approvers based on amount.
2080    fn maybe_apply_approval_workflow(
2081        &mut self,
2082        entry: &mut JournalEntry,
2083        _posting_date: NaiveDate,
2084    ) {
2085        use rust_decimal::Decimal;
2086
2087        let amount = entry.total_debit();
2088
2089        // Skip if amount is below threshold
2090        if amount <= self.approval_threshold {
2091            // Auto-approved below threshold
2092            let workflow = ApprovalWorkflow::auto_approved(
2093                entry.header.created_by.clone(),
2094                entry.header.user_persona.clone(),
2095                amount,
2096                entry.header.created_at,
2097            );
2098            entry.header.approval_workflow = Some(workflow);
2099            return;
2100        }
2101
2102        // Mark as SOX relevant for high-value transactions
2103        entry.header.sox_relevant = true;
2104
2105        // Determine required approval levels based on amount
2106        let required_levels = if amount > Decimal::new(100000, 0) {
2107            3 // Executive approval required
2108        } else if amount > Decimal::new(50000, 0) {
2109            2 // Senior management approval
2110        } else {
2111            1 // Manager approval
2112        };
2113
2114        // Create the approval workflow
2115        let mut workflow = ApprovalWorkflow::new(
2116            entry.header.created_by.clone(),
2117            entry.header.user_persona.clone(),
2118            amount,
2119        );
2120        workflow.required_levels = required_levels;
2121
2122        // Simulate submission
2123        let submit_time = entry.header.created_at;
2124        let submit_action = ApprovalAction::new(
2125            entry.header.created_by.clone(),
2126            entry.header.user_persona.clone(),
2127            self.parse_persona(&entry.header.user_persona),
2128            ApprovalActionType::Submit,
2129            0,
2130        )
2131        .with_timestamp(submit_time);
2132
2133        workflow.actions.push(submit_action);
2134        workflow.status = ApprovalStatus::Pending;
2135        workflow.submitted_at = Some(submit_time);
2136
2137        // Simulate approvals with realistic delays
2138        let mut current_time = submit_time;
2139        for level in 1..=required_levels {
2140            // Add delay for approval (1-3 business hours per level)
2141            let delay_hours = self.rng.random_range(1..4);
2142            current_time += chrono::Duration::hours(delay_hours);
2143
2144            // Skip weekends
2145            while current_time.weekday() == chrono::Weekday::Sat
2146                || current_time.weekday() == chrono::Weekday::Sun
2147            {
2148                current_time += chrono::Duration::days(1);
2149            }
2150
2151            // Generate approver based on level
2152            let (approver_id, approver_role) = self.select_approver(level);
2153
2154            let approve_action = ApprovalAction::new(
2155                approver_id.clone(),
2156                approver_role.to_string(),
2157                approver_role,
2158                ApprovalActionType::Approve,
2159                level,
2160            )
2161            .with_timestamp(current_time);
2162
2163            workflow.actions.push(approve_action);
2164            workflow.current_level = level;
2165        }
2166
2167        // Mark as approved
2168        workflow.status = ApprovalStatus::Approved;
2169        workflow.approved_at = Some(current_time);
2170
2171        entry.header.approval_workflow = Some(workflow);
2172    }
2173
2174    /// Select an approver based on the required level.
2175    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
2176        let persona = match level {
2177            1 => UserPersona::Manager,
2178            2 => UserPersona::Controller,
2179            _ => UserPersona::Executive,
2180        };
2181
2182        // Try to get from user pool first
2183        if let Some(ref pool) = self.user_pool {
2184            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2185                return (user.user_id.clone(), persona);
2186            }
2187        }
2188
2189        // Fallback to generated approver
2190        let approver_id = match persona {
2191            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2192            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2193            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2194            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2195        };
2196
2197        (approver_id, persona)
2198    }
2199
2200    /// Parse user persona from string.
2201    fn parse_persona(&self, persona_str: &str) -> UserPersona {
2202        match persona_str.to_lowercase().as_str() {
2203            s if s.contains("junior") => UserPersona::JuniorAccountant,
2204            s if s.contains("senior") => UserPersona::SeniorAccountant,
2205            s if s.contains("controller") => UserPersona::Controller,
2206            s if s.contains("manager") => UserPersona::Manager,
2207            s if s.contains("executive") => UserPersona::Executive,
2208            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2209            _ => UserPersona::JuniorAccountant, // Default
2210        }
2211    }
2212
2213    /// Enable or disable approval workflow.
2214    pub fn with_approval(mut self, enabled: bool) -> Self {
2215        self.approval_enabled = enabled;
2216        self
2217    }
2218
2219    /// Set the approval threshold amount.
2220    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2221        self.approval_threshold = threshold;
2222        self
2223    }
2224
2225    /// Set the SOD violation rate for approval tracking.
2226    ///
2227    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
2228    /// that the approver is the same as the creator, which constitutes a SOD violation.
2229    /// Default is 0.10 (10%).
2230    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2231        self.sod_violation_rate = rate;
2232        self
2233    }
2234
2235    /// Populate `approved_by` and `approval_date` from the approval workflow,
2236    /// and flag SOD violations when the approver matches the creator.
2237    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2238        if let Some(ref workflow) = entry.header.approval_workflow {
2239            // Extract the last approver from the workflow actions
2240            let last_approver = workflow
2241                .actions
2242                .iter()
2243                .rev()
2244                .find(|a| matches!(a.action, ApprovalActionType::Approve));
2245
2246            if let Some(approver_action) = last_approver {
2247                entry.header.approved_by = Some(approver_action.actor_id.clone());
2248                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2249            } else {
2250                // No explicit approver (auto-approved); use the preparer
2251                entry.header.approved_by = Some(workflow.preparer_id.clone());
2252                entry.header.approval_date = Some(posting_date);
2253            }
2254
2255            // Inject SOD violation: with configured probability, set approver = creator
2256            if self.rng.random::<f64>() < self.sod_violation_rate {
2257                let creator = entry.header.created_by.clone();
2258                entry.header.approved_by = Some(creator);
2259                entry.header.sod_violation = true;
2260                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2261            }
2262        }
2263    }
2264
2265    /// Set the temporal drift controller for simulating distribution changes over time.
2266    ///
2267    /// When drift is enabled, amounts and other distributions will shift based on
2268    /// the period (month) to simulate realistic temporal evolution like inflation
2269    /// or increasing fraud rates.
2270    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2271        self.drift_controller = Some(controller);
2272        self
2273    }
2274
2275    /// Set drift configuration directly.
2276    ///
2277    /// Creates a drift controller from the config. Total periods is calculated
2278    /// from the date range.
2279    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2280        if config.enabled {
2281            let total_periods = self.calculate_total_periods();
2282            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2283        }
2284        self
2285    }
2286
2287    /// Calculate total periods (months) in the date range.
2288    fn calculate_total_periods(&self) -> u32 {
2289        let start_year = self.start_date.year();
2290        let start_month = self.start_date.month();
2291        let end_year = self.end_date.year();
2292        let end_month = self.end_date.month();
2293
2294        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2295    }
2296
2297    /// Calculate the period number (0-indexed) for a given date.
2298    fn date_to_period(&self, date: NaiveDate) -> u32 {
2299        let start_year = self.start_date.year();
2300        let start_month = self.start_date.month() as i32;
2301        let date_year = date.year();
2302        let date_month = date.month() as i32;
2303
2304        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2305    }
2306
2307    /// Get drift adjustments for a given date.
2308    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2309        if let Some(ref controller) = self.drift_controller {
2310            let period = self.date_to_period(date);
2311            controller.compute_adjustments(period)
2312        } else {
2313            DriftAdjustments::none()
2314        }
2315    }
2316
2317    /// Select a user from the pool or generate a generic user ID.
2318    #[inline]
2319    fn select_user(&mut self, is_automated: bool) -> (String, String) {
2320        if let Some(ref pool) = self.user_pool {
2321            let persona = if is_automated {
2322                UserPersona::AutomatedSystem
2323            } else {
2324                // Random distribution among human personas
2325                let roll: f64 = self.rng.random();
2326                if roll < 0.4 {
2327                    UserPersona::JuniorAccountant
2328                } else if roll < 0.7 {
2329                    UserPersona::SeniorAccountant
2330                } else if roll < 0.85 {
2331                    UserPersona::Controller
2332                } else {
2333                    UserPersona::Manager
2334                }
2335            };
2336
2337            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2338                return (user.user_id.clone(), user.persona.to_string());
2339            }
2340        }
2341
2342        // Fallback to generic format
2343        if is_automated {
2344            (
2345                format!("BATCH{:04}", self.rng.random_range(1..=20)),
2346                "automated_system".to_string(),
2347            )
2348        } else {
2349            (
2350                format!("USER{:04}", self.rng.random_range(1..=40)),
2351                "senior_accountant".to_string(),
2352            )
2353        }
2354    }
2355
2356    /// Select transaction source based on configuration weights.
2357    #[inline]
2358    fn select_source(&mut self) -> TransactionSource {
2359        let roll: f64 = self.rng.random();
2360        let dist = &self.config.source_distribution;
2361
2362        if roll < dist.manual {
2363            TransactionSource::Manual
2364        } else if roll < dist.manual + dist.automated {
2365            TransactionSource::Automated
2366        } else if roll < dist.manual + dist.automated + dist.recurring {
2367            TransactionSource::Recurring
2368        } else {
2369            TransactionSource::Adjustment
2370        }
2371    }
2372
2373    /// Select a business process based on configuration weights.
2374    #[inline]
2375    /// Map a business process to a SAP-style document type code.
2376    ///
2377    /// - P2P → "KR" (vendor invoice)
2378    /// - O2C → "DR" (customer invoice)
2379    /// - R2R → "SA" (general journal)
2380    /// - H2R → "HR" (HR posting)
2381    /// - A2R → "AA" (asset posting)
2382    /// - others → "SA"
2383    fn document_type_for_process(process: BusinessProcess) -> &'static str {
2384        match process {
2385            BusinessProcess::P2P => "KR",
2386            BusinessProcess::O2C => "DR",
2387            BusinessProcess::R2R => "SA",
2388            BusinessProcess::H2R => "HR",
2389            BusinessProcess::A2R => "AA",
2390            _ => "SA",
2391        }
2392    }
2393
2394    fn select_business_process(&mut self) -> BusinessProcess {
2395        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2396    }
2397
2398    #[inline]
2399    fn select_debit_account(&mut self) -> &GLAccount {
2400        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2401        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2402
2403        // 60% asset, 40% expense for debits
2404        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2405            accounts
2406        } else {
2407            expense_accounts
2408        };
2409
2410        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2411            tracing::warn!(
2412                "Account selection returned empty list, falling back to first COA account"
2413            );
2414            &self.coa.accounts[0]
2415        })
2416    }
2417
2418    #[inline]
2419    fn select_credit_account(&mut self) -> &GLAccount {
2420        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2421        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2422
2423        // 60% liability, 40% revenue for credits
2424        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2425            liability_accounts
2426        } else {
2427            revenue_accounts
2428        };
2429
2430        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2431            tracing::warn!(
2432                "Account selection returned empty list, falling back to first COA account"
2433            );
2434            &self.coa.accounts[0]
2435        })
2436    }
2437}
2438
2439impl Generator for JournalEntryGenerator {
2440    type Item = JournalEntry;
2441    type Config = (
2442        TransactionConfig,
2443        Arc<ChartOfAccounts>,
2444        Vec<String>,
2445        NaiveDate,
2446        NaiveDate,
2447    );
2448
2449    fn new(config: Self::Config, seed: u64) -> Self {
2450        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2451    }
2452
2453    fn generate_one(&mut self) -> Self::Item {
2454        self.generate()
2455    }
2456
2457    fn reset(&mut self) {
2458        self.rng = seeded_rng(self.seed, 0);
2459        self.line_sampler.reset(self.seed + 1);
2460        self.amount_sampler.reset(self.seed + 2);
2461        self.temporal_sampler.reset(self.seed + 3);
2462        if let Some(ref mut adv) = self.advanced_amount_sampler {
2463            adv.reset(self.seed + 2);
2464        }
2465        self.count = 0;
2466        self.uuid_factory.reset();
2467
2468        // Reset reference generator by recreating it
2469        let mut ref_gen = ReferenceGenerator::new(
2470            self.start_date.year(),
2471            self.companies
2472                .first()
2473                .map(std::string::String::as_str)
2474                .unwrap_or("1000"),
2475        );
2476        ref_gen.set_prefix(
2477            ReferenceType::Invoice,
2478            &self.template_config.references.invoice_prefix,
2479        );
2480        ref_gen.set_prefix(
2481            ReferenceType::PurchaseOrder,
2482            &self.template_config.references.po_prefix,
2483        );
2484        ref_gen.set_prefix(
2485            ReferenceType::SalesOrder,
2486            &self.template_config.references.so_prefix,
2487        );
2488        self.reference_generator = ref_gen;
2489    }
2490
2491    fn count(&self) -> u64 {
2492        self.count
2493    }
2494
2495    fn seed(&self) -> u64 {
2496        self.seed
2497    }
2498}
2499
2500use datasynth_core::traits::ParallelGenerator;
2501
2502impl ParallelGenerator for JournalEntryGenerator {
2503    /// Split this generator into `parts` independent sub-generators.
2504    ///
2505    /// Each sub-generator gets a deterministic seed derived from the parent seed
2506    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2507    /// The results are deterministic for a given partition count.
2508    fn split(self, parts: usize) -> Vec<Self> {
2509        let parts = parts.max(1);
2510        (0..parts)
2511            .map(|i| {
2512                // Derive a unique seed per partition using a golden-ratio constant
2513                let sub_seed = self
2514                    .seed
2515                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2516
2517                let mut gen = JournalEntryGenerator::new_with_full_config(
2518                    self.config.clone(),
2519                    Arc::clone(&self.coa),
2520                    self.companies.clone(),
2521                    self.start_date,
2522                    self.end_date,
2523                    sub_seed,
2524                    self.template_config.clone(),
2525                    self.user_pool.clone(),
2526                );
2527
2528                // Copy over configuration state
2529                gen.company_selector = self.company_selector.clone();
2530                gen.vendor_pool = self.vendor_pool.clone();
2531                gen.customer_pool = self.customer_pool.clone();
2532                gen.material_pool = self.material_pool.clone();
2533                gen.using_real_master_data = self.using_real_master_data;
2534                gen.fraud_config = self.fraud_config.clone();
2535                gen.persona_errors_enabled = self.persona_errors_enabled;
2536                gen.approval_enabled = self.approval_enabled;
2537                gen.approval_threshold = self.approval_threshold;
2538                gen.sod_violation_rate = self.sod_violation_rate;
2539                // v3.4.0+: advanced amount sampler (mixture / Pareto /
2540                // Gaussian). Clone and reset the internal RNG with the
2541                // partition's sub_seed so each worker explores a unique
2542                // subsequence without repeating the parent stream.
2543                if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2544                    adv.reset(sub_seed.wrapping_add(2));
2545                    gen.advanced_amount_sampler = Some(adv);
2546                }
2547                // v3.5.3+: conditional amount override — clone + reset
2548                // so each partition gets a fresh deterministic stream.
2549                if let Some(mut cond) = self.conditional_amount_override.clone() {
2550                    cond.reset(sub_seed.wrapping_add(17));
2551                    gen.conditional_amount_override = Some(cond);
2552                }
2553                // v3.5.4+: copula sampler — clone + reset per partition.
2554                if let Some(mut cop) = self.correlation_copula.clone() {
2555                    cop.reset(sub_seed.wrapping_add(31));
2556                    gen.correlation_copula = Some(cop);
2557                }
2558
2559                // Use partitioned UUID factory to eliminate atomic contention
2560                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2561                    sub_seed,
2562                    GeneratorType::JournalEntry,
2563                    i as u8,
2564                );
2565
2566                // Copy temporal patterns if configured
2567                if let Some(ref config) = self.temporal_patterns_config {
2568                    gen.temporal_patterns_config = Some(config.clone());
2569                    // Rebuild business day calculator from the stored config
2570                    if config.business_days.enabled {
2571                        if let Some(ref bdc) = self.business_day_calculator {
2572                            gen.business_day_calculator = Some(bdc.clone());
2573                        }
2574                    }
2575                    // Rebuild processing lag calculator with partition seed
2576                    if config.processing_lags.enabled {
2577                        let lag_config =
2578                            Self::convert_processing_lag_config(&config.processing_lags);
2579                        gen.processing_lag_calculator =
2580                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2581                    }
2582                }
2583
2584                // Copy drift controller if present
2585                if let Some(ref dc) = self.drift_controller {
2586                    gen.drift_controller = Some(dc.clone());
2587                }
2588
2589                gen
2590            })
2591            .collect()
2592    }
2593}
2594
2595#[cfg(test)]
2596#[allow(clippy::unwrap_used)]
2597mod tests {
2598    use super::*;
2599    use crate::ChartOfAccountsGenerator;
2600
2601    #[test]
2602    fn test_generate_balanced_entries() {
2603        let mut coa_gen =
2604            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2605        let coa = Arc::new(coa_gen.generate());
2606
2607        let mut je_gen = JournalEntryGenerator::new_with_params(
2608            TransactionConfig::default(),
2609            coa,
2610            vec!["1000".to_string()],
2611            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2612            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2613            42,
2614        );
2615
2616        let mut balanced_count = 0;
2617        for _ in 0..100 {
2618            let entry = je_gen.generate();
2619
2620            // Skip entries with human errors as they may be intentionally unbalanced
2621            let has_human_error = entry
2622                .header
2623                .header_text
2624                .as_ref()
2625                .map(|t| t.contains("[HUMAN_ERROR:"))
2626                .unwrap_or(false);
2627
2628            if !has_human_error {
2629                assert!(
2630                    entry.is_balanced(),
2631                    "Entry {:?} is not balanced",
2632                    entry.header.document_id
2633                );
2634                balanced_count += 1;
2635            }
2636            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2637        }
2638
2639        // Ensure most entries are balanced (human errors are rare)
2640        assert!(
2641            balanced_count >= 80,
2642            "Expected at least 80 balanced entries, got {}",
2643            balanced_count
2644        );
2645    }
2646
2647    #[test]
2648    fn test_deterministic_generation() {
2649        let mut coa_gen =
2650            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2651        let coa = Arc::new(coa_gen.generate());
2652
2653        let mut gen1 = JournalEntryGenerator::new_with_params(
2654            TransactionConfig::default(),
2655            Arc::clone(&coa),
2656            vec!["1000".to_string()],
2657            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2658            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2659            42,
2660        );
2661
2662        let mut gen2 = JournalEntryGenerator::new_with_params(
2663            TransactionConfig::default(),
2664            coa,
2665            vec!["1000".to_string()],
2666            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2667            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2668            42,
2669        );
2670
2671        for _ in 0..50 {
2672            let e1 = gen1.generate();
2673            let e2 = gen2.generate();
2674            assert_eq!(e1.header.document_id, e2.header.document_id);
2675            assert_eq!(e1.total_debit(), e2.total_debit());
2676        }
2677    }
2678
2679    #[test]
2680    fn test_templates_generate_descriptions() {
2681        let mut coa_gen =
2682            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2683        let coa = Arc::new(coa_gen.generate());
2684
2685        // Enable all template features
2686        let template_config = TemplateConfig {
2687            names: datasynth_config::schema::NameTemplateConfig {
2688                generate_realistic_names: true,
2689                email_domain: "test.com".to_string(),
2690                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2691            },
2692            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2693                generate_header_text: true,
2694                generate_line_text: true,
2695            },
2696            references: datasynth_config::schema::ReferenceTemplateConfig {
2697                generate_references: true,
2698                invoice_prefix: "TEST-INV".to_string(),
2699                po_prefix: "TEST-PO".to_string(),
2700                so_prefix: "TEST-SO".to_string(),
2701            },
2702            path: None,
2703            merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2704        };
2705
2706        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2707            TransactionConfig::default(),
2708            coa,
2709            vec!["1000".to_string()],
2710            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2711            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2712            42,
2713            template_config,
2714            None,
2715        )
2716        .with_persona_errors(false); // Disable for template testing
2717
2718        for _ in 0..10 {
2719            let entry = je_gen.generate();
2720
2721            // Verify header text is populated
2722            assert!(
2723                entry.header.header_text.is_some(),
2724                "Header text should be populated"
2725            );
2726
2727            // Verify reference is populated
2728            assert!(
2729                entry.header.reference.is_some(),
2730                "Reference should be populated"
2731            );
2732
2733            // Verify business process is set
2734            assert!(
2735                entry.header.business_process.is_some(),
2736                "Business process should be set"
2737            );
2738
2739            // Verify line text is populated
2740            for line in &entry.lines {
2741                assert!(line.line_text.is_some(), "Line text should be populated");
2742            }
2743
2744            // Entry should still be balanced
2745            assert!(entry.is_balanced());
2746        }
2747    }
2748
2749    #[test]
2750    fn test_user_pool_integration() {
2751        let mut coa_gen =
2752            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2753        let coa = Arc::new(coa_gen.generate());
2754
2755        let companies = vec!["1000".to_string()];
2756
2757        // Generate user pool
2758        let mut user_gen = crate::UserGenerator::new(42);
2759        let user_pool = user_gen.generate_standard(&companies);
2760
2761        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2762            TransactionConfig::default(),
2763            coa,
2764            companies,
2765            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2766            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2767            42,
2768            TemplateConfig::default(),
2769            Some(user_pool),
2770        );
2771
2772        // Generate entries and verify user IDs are from pool
2773        for _ in 0..20 {
2774            let entry = je_gen.generate();
2775
2776            // User ID should not be generic BATCH/USER format when pool is used
2777            // (though it may still fall back if random selection misses)
2778            assert!(!entry.header.created_by.is_empty());
2779        }
2780    }
2781
2782    #[test]
2783    fn test_master_data_connection() {
2784        let mut coa_gen =
2785            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2786        let coa = Arc::new(coa_gen.generate());
2787
2788        // Create test vendors
2789        let vendors = vec![
2790            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2791            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2792        ];
2793
2794        // Create test customers
2795        let customers = vec![
2796            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2797            Customer::new(
2798                "C-TEST-002",
2799                "Test Customer Two",
2800                CustomerType::SmallBusiness,
2801            ),
2802        ];
2803
2804        // Create test materials
2805        let materials = vec![Material::new(
2806            "MAT-TEST-001",
2807            "Test Material A",
2808            MaterialType::RawMaterial,
2809        )];
2810
2811        // Create generator with master data
2812        let generator = JournalEntryGenerator::new_with_params(
2813            TransactionConfig::default(),
2814            coa,
2815            vec!["1000".to_string()],
2816            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2817            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2818            42,
2819        );
2820
2821        // Without master data
2822        assert!(!generator.is_using_real_master_data());
2823
2824        // Connect master data
2825        let generator_with_data = generator
2826            .with_vendors(&vendors)
2827            .with_customers(&customers)
2828            .with_materials(&materials);
2829
2830        // Should now be using real master data
2831        assert!(generator_with_data.is_using_real_master_data());
2832    }
2833
2834    #[test]
2835    fn test_with_master_data_convenience_method() {
2836        let mut coa_gen =
2837            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2838        let coa = Arc::new(coa_gen.generate());
2839
2840        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2841        let customers = vec![Customer::new(
2842            "C-001",
2843            "Customer One",
2844            CustomerType::Corporate,
2845        )];
2846        let materials = vec![Material::new(
2847            "MAT-001",
2848            "Material One",
2849            MaterialType::RawMaterial,
2850        )];
2851
2852        let generator = JournalEntryGenerator::new_with_params(
2853            TransactionConfig::default(),
2854            coa,
2855            vec!["1000".to_string()],
2856            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2857            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2858            42,
2859        )
2860        .with_master_data(&vendors, &customers, &materials);
2861
2862        assert!(generator.is_using_real_master_data());
2863    }
2864
2865    #[test]
2866    fn test_stress_factors_increase_error_rate() {
2867        let mut coa_gen =
2868            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2869        let coa = Arc::new(coa_gen.generate());
2870
2871        let generator = JournalEntryGenerator::new_with_params(
2872            TransactionConfig::default(),
2873            coa,
2874            vec!["1000".to_string()],
2875            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2876            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2877            42,
2878        );
2879
2880        let base_rate = 0.1;
2881
2882        // Regular day - no stress factors
2883        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2884        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2885        assert!(
2886            (regular_rate - base_rate).abs() < 0.01,
2887            "Regular day should have minimal stress factor adjustment"
2888        );
2889
2890        // Month end - 50% more errors
2891        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2892        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2893        assert!(
2894            month_end_rate > regular_rate,
2895            "Month end should have higher error rate than regular day"
2896        );
2897
2898        // Year end - double the error rate
2899        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2900        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2901        assert!(
2902            year_end_rate > month_end_rate,
2903            "Year end should have highest error rate"
2904        );
2905
2906        // Friday stress
2907        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2908        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2909        assert!(
2910            friday_rate > regular_rate,
2911            "Friday should have higher error rate than mid-week"
2912        );
2913
2914        // Monday stress
2915        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2916        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2917        assert!(
2918            monday_rate > regular_rate,
2919            "Monday should have higher error rate than mid-week"
2920        );
2921    }
2922
2923    #[test]
2924    fn test_batching_produces_similar_entries() {
2925        let mut coa_gen =
2926            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2927        let coa = Arc::new(coa_gen.generate());
2928
2929        // Use seed 123 which is more likely to trigger batching
2930        let mut je_gen = JournalEntryGenerator::new_with_params(
2931            TransactionConfig::default(),
2932            coa,
2933            vec!["1000".to_string()],
2934            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2935            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2936            123,
2937        )
2938        .with_persona_errors(false); // Disable to ensure balanced entries
2939
2940        // Generate many entries - at 15% batch rate, should see some batches
2941        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2942
2943        // Check that all entries are balanced (batched or not)
2944        for entry in &entries {
2945            assert!(
2946                entry.is_balanced(),
2947                "All entries including batched should be balanced"
2948            );
2949        }
2950
2951        // Count entries with same-day posting dates (batch indicator)
2952        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2953            std::collections::HashMap::new();
2954        for entry in &entries {
2955            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2956        }
2957
2958        // With batching, some dates should have multiple entries
2959        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2960        assert!(
2961            dates_with_multiple > 0,
2962            "With batching, should see some dates with multiple entries"
2963        );
2964    }
2965
2966    #[test]
2967    fn test_temporal_patterns_business_days() {
2968        use datasynth_config::schema::{
2969            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2970        };
2971
2972        let mut coa_gen =
2973            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2974        let coa = Arc::new(coa_gen.generate());
2975
2976        // Create temporal patterns config with business days enabled
2977        let temporal_config = TemporalPatternsConfig {
2978            enabled: true,
2979            business_days: BusinessDaySchemaConfig {
2980                enabled: true,
2981                ..Default::default()
2982            },
2983            calendars: CalendarSchemaConfig {
2984                regions: vec!["US".to_string()],
2985                custom_holidays: vec![],
2986            },
2987            ..Default::default()
2988        };
2989
2990        let mut je_gen = JournalEntryGenerator::new_with_params(
2991            TransactionConfig::default(),
2992            coa,
2993            vec!["1000".to_string()],
2994            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2995            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2996            42,
2997        )
2998        .with_temporal_patterns(temporal_config, 42)
2999        .with_persona_errors(false);
3000
3001        // Generate entries and verify none fall on weekends
3002        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
3003
3004        for entry in &entries {
3005            let weekday = entry.header.posting_date.weekday();
3006            assert!(
3007                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
3008                "Posting date {:?} should not be a weekend",
3009                entry.header.posting_date
3010            );
3011        }
3012    }
3013
3014    #[test]
3015    fn test_default_generation_filters_weekends() {
3016        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
3017        // This tests the fix where new_with_full_config always creates a default
3018        // BusinessDayCalculator with US holidays as a fallback.
3019        let mut coa_gen =
3020            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3021        let coa = Arc::new(coa_gen.generate());
3022
3023        let mut je_gen = JournalEntryGenerator::new_with_params(
3024            TransactionConfig::default(),
3025            coa,
3026            vec!["1000".to_string()],
3027            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3028            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3029            42,
3030        )
3031        .with_persona_errors(false);
3032
3033        let total = 500;
3034        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3035
3036        let weekend_count = entries
3037            .iter()
3038            .filter(|e| {
3039                let wd = e.header.posting_date.weekday();
3040                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
3041            })
3042            .count();
3043
3044        let weekend_pct = weekend_count as f64 / total as f64;
3045        assert!(
3046            weekend_pct < 0.05,
3047            "Expected weekend entries <5% of total without temporal_patterns enabled, \
3048             but got {:.1}% ({}/{})",
3049            weekend_pct * 100.0,
3050            weekend_count,
3051            total
3052        );
3053    }
3054
3055    #[test]
3056    fn test_document_type_derived_from_business_process() {
3057        let mut coa_gen =
3058            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3059        let coa = Arc::new(coa_gen.generate());
3060
3061        let mut je_gen = JournalEntryGenerator::new_with_params(
3062            TransactionConfig::default(),
3063            coa,
3064            vec!["1000".to_string()],
3065            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3066            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3067            99,
3068        )
3069        .with_persona_errors(false)
3070        .with_batching(false);
3071
3072        let total = 200;
3073        let mut doc_types = std::collections::HashSet::new();
3074        let mut sa_count = 0_usize;
3075
3076        for _ in 0..total {
3077            let entry = je_gen.generate();
3078            let dt = &entry.header.document_type;
3079            doc_types.insert(dt.clone());
3080            if dt == "SA" {
3081                sa_count += 1;
3082            }
3083        }
3084
3085        // Should have more than 3 distinct document types
3086        assert!(
3087            doc_types.len() > 3,
3088            "Expected >3 distinct document types, got {} ({:?})",
3089            doc_types.len(),
3090            doc_types,
3091        );
3092
3093        // "SA" should be less than 50% (R2R is 20% of the weight)
3094        let sa_pct = sa_count as f64 / total as f64;
3095        assert!(
3096            sa_pct < 0.50,
3097            "Expected SA <50%, got {:.1}% ({}/{})",
3098            sa_pct * 100.0,
3099            sa_count,
3100            total,
3101        );
3102    }
3103
3104    #[test]
3105    fn test_enrich_line_items_account_description() {
3106        let mut coa_gen =
3107            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3108        let coa = Arc::new(coa_gen.generate());
3109
3110        let mut je_gen = JournalEntryGenerator::new_with_params(
3111            TransactionConfig::default(),
3112            coa,
3113            vec!["1000".to_string()],
3114            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3115            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3116            42,
3117        )
3118        .with_persona_errors(false);
3119
3120        let total = 200;
3121        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3122
3123        // Count lines with account_description populated
3124        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3125        let lines_with_desc: usize = entries
3126            .iter()
3127            .flat_map(|e| &e.lines)
3128            .filter(|l| l.account_description.is_some())
3129            .count();
3130
3131        let desc_pct = lines_with_desc as f64 / total_lines as f64;
3132        assert!(
3133            desc_pct > 0.95,
3134            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
3135            desc_pct * 100.0,
3136            lines_with_desc,
3137            total_lines,
3138        );
3139    }
3140
3141    #[test]
3142    fn test_enrich_line_items_cost_center_for_expense_accounts() {
3143        let mut coa_gen =
3144            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3145        let coa = Arc::new(coa_gen.generate());
3146
3147        let mut je_gen = JournalEntryGenerator::new_with_params(
3148            TransactionConfig::default(),
3149            coa,
3150            vec!["1000".to_string()],
3151            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3152            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3153            42,
3154        )
3155        .with_persona_errors(false);
3156
3157        let total = 300;
3158        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3159
3160        // Count expense account lines (5xxx/6xxx) with cost_center populated
3161        let expense_lines: Vec<&JournalEntryLine> = entries
3162            .iter()
3163            .flat_map(|e| &e.lines)
3164            .filter(|l| {
3165                let first = l.gl_account.chars().next().unwrap_or('0');
3166                first == '5' || first == '6'
3167            })
3168            .collect();
3169
3170        if !expense_lines.is_empty() {
3171            let with_cc = expense_lines
3172                .iter()
3173                .filter(|l| l.cost_center.is_some())
3174                .count();
3175            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
3176            assert!(
3177                cc_pct > 0.80,
3178                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
3179                cc_pct * 100.0,
3180                with_cc,
3181                expense_lines.len(),
3182            );
3183        }
3184    }
3185
3186    #[test]
3187    fn test_enrich_line_items_profit_center_and_line_text() {
3188        let mut coa_gen =
3189            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3190        let coa = Arc::new(coa_gen.generate());
3191
3192        let mut je_gen = JournalEntryGenerator::new_with_params(
3193            TransactionConfig::default(),
3194            coa,
3195            vec!["1000".to_string()],
3196            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3197            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3198            42,
3199        )
3200        .with_persona_errors(false);
3201
3202        let total = 100;
3203        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3204
3205        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3206
3207        // All lines should have profit_center
3208        let with_pc = entries
3209            .iter()
3210            .flat_map(|e| &e.lines)
3211            .filter(|l| l.profit_center.is_some())
3212            .count();
3213        let pc_pct = with_pc as f64 / total_lines as f64;
3214        assert!(
3215            pc_pct > 0.95,
3216            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3217            pc_pct * 100.0,
3218            with_pc,
3219            total_lines,
3220        );
3221
3222        // All lines should have line_text (either from template or header fallback)
3223        let with_text = entries
3224            .iter()
3225            .flat_map(|e| &e.lines)
3226            .filter(|l| l.line_text.is_some())
3227            .count();
3228        let text_pct = with_text as f64 / total_lines as f64;
3229        assert!(
3230            text_pct > 0.95,
3231            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3232            text_pct * 100.0,
3233            with_text,
3234            total_lines,
3235        );
3236    }
3237
3238    // --- ISA 240 audit flag tests ---
3239
3240    #[test]
3241    fn test_je_has_audit_flags() {
3242        let mut coa_gen =
3243            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3244        let coa = Arc::new(coa_gen.generate());
3245
3246        let mut je_gen = JournalEntryGenerator::new_with_params(
3247            TransactionConfig::default(),
3248            coa,
3249            vec!["1000".to_string()],
3250            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3251            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3252            42,
3253        )
3254        .with_persona_errors(false);
3255
3256        for _ in 0..100 {
3257            let entry = je_gen.generate();
3258
3259            // source_system should always be non-empty
3260            assert!(
3261                !entry.header.source_system.is_empty(),
3262                "source_system should be populated, got empty string"
3263            );
3264
3265            // created_by should always be non-empty (already tested elsewhere, but confirm)
3266            assert!(
3267                !entry.header.created_by.is_empty(),
3268                "created_by should be populated"
3269            );
3270
3271            // created_date should always be populated
3272            assert!(
3273                entry.header.created_date.is_some(),
3274                "created_date should be populated"
3275            );
3276        }
3277    }
3278
3279    #[test]
3280    fn test_manual_entry_rate() {
3281        let mut coa_gen =
3282            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3283        let coa = Arc::new(coa_gen.generate());
3284
3285        let mut je_gen = JournalEntryGenerator::new_with_params(
3286            TransactionConfig::default(),
3287            coa,
3288            vec!["1000".to_string()],
3289            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3290            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3291            42,
3292        )
3293        .with_persona_errors(false)
3294        .with_batching(false);
3295
3296        let total = 1000;
3297        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3298
3299        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3300        let manual_rate = manual_count as f64 / total as f64;
3301
3302        // Default source_distribution.manual is typically around 0.05-0.15
3303        // Allow a wide tolerance for statistical variation
3304        assert!(
3305            manual_rate > 0.01 && manual_rate < 0.50,
3306            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3307            manual_rate * 100.0,
3308            manual_count,
3309            total,
3310        );
3311
3312        // is_manual should match TransactionSource::Manual
3313        for entry in &entries {
3314            let source_is_manual = entry.header.source == TransactionSource::Manual;
3315            assert_eq!(
3316                entry.header.is_manual, source_is_manual,
3317                "is_manual should match source == Manual"
3318            );
3319        }
3320    }
3321
3322    #[test]
3323    fn test_manual_source_consistency() {
3324        let mut coa_gen =
3325            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3326        let coa = Arc::new(coa_gen.generate());
3327
3328        let mut je_gen = JournalEntryGenerator::new_with_params(
3329            TransactionConfig::default(),
3330            coa,
3331            vec!["1000".to_string()],
3332            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3333            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3334            42,
3335        )
3336        .with_persona_errors(false)
3337        .with_batching(false);
3338
3339        for _ in 0..500 {
3340            let entry = je_gen.generate();
3341
3342            if entry.header.is_manual {
3343                // Manual entries must have a source_system in the
3344                // `manual/...` or `spreadsheet/...` family (the bare
3345                // legacy `manual` and `spreadsheet` values are also
3346                // accepted to keep older fixtures working).
3347                let s = entry.header.source_system.as_str();
3348                assert!(
3349                    s == "manual"
3350                        || s == "spreadsheet"
3351                        || s.starts_with("manual/")
3352                        || s.starts_with("spreadsheet/"),
3353                    "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
3354                );
3355            } else {
3356                // Non-manual entries must NOT be in the manual/spreadsheet family.
3357                let s = entry.header.source_system.as_str();
3358                assert!(
3359                    !(s == "manual"
3360                        || s == "spreadsheet"
3361                        || s.starts_with("manual/")
3362                        || s.starts_with("spreadsheet/")),
3363                    "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
3364                );
3365            }
3366        }
3367    }
3368
3369    #[test]
3370    fn test_created_date_before_posting() {
3371        let mut coa_gen =
3372            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3373        let coa = Arc::new(coa_gen.generate());
3374
3375        let mut je_gen = JournalEntryGenerator::new_with_params(
3376            TransactionConfig::default(),
3377            coa,
3378            vec!["1000".to_string()],
3379            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3380            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3381            42,
3382        )
3383        .with_persona_errors(false);
3384
3385        for _ in 0..500 {
3386            let entry = je_gen.generate();
3387
3388            if let Some(created_date) = entry.header.created_date {
3389                let created_naive_date = created_date.date();
3390                assert!(
3391                    created_naive_date <= entry.header.posting_date,
3392                    "created_date ({}) should be <= posting_date ({})",
3393                    created_naive_date,
3394                    entry.header.posting_date,
3395                );
3396            }
3397        }
3398    }
3399}