Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15    TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18    AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19    DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20    PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21    ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34/// Generator for realistic journal entries.
35pub struct JournalEntryGenerator {
36    rng: ChaCha8Rng,
37    seed: u64,
38    config: TransactionConfig,
39    coa: Arc<ChartOfAccounts>,
40    companies: Vec<String>,
41    company_selector: WeightedCompanySelector,
42    line_sampler: LineItemSampler,
43    amount_sampler: AmountSampler,
44    temporal_sampler: TemporalSampler,
45    start_date: NaiveDate,
46    end_date: NaiveDate,
47    count: u64,
48    uuid_factory: DeterministicUuidFactory,
49    // Enhanced features
50    user_pool: Option<UserPool>,
51    description_generator: DescriptionGenerator,
52    reference_generator: ReferenceGenerator,
53    template_config: TemplateConfig,
54    vendor_pool: VendorPool,
55    customer_pool: CustomerPool,
56    // Material pool for realistic material references
57    material_pool: Option<MaterialPool>,
58    // Flag indicating whether we're using real master data vs defaults
59    using_real_master_data: bool,
60    // Fraud generation
61    fraud_config: FraudConfig,
62    // Persona-based error injection
63    persona_errors_enabled: bool,
64    // Approval threshold enforcement
65    approval_enabled: bool,
66    approval_threshold: rust_decimal::Decimal,
67    // SOD violation rate for approval tracking (0.0 to 1.0)
68    sod_violation_rate: f64,
69    // Batching behavior - humans often process similar items together
70    batch_state: Option<BatchState>,
71    // Temporal drift controller for simulating distribution changes over time
72    drift_controller: Option<DriftController>,
73    // Temporal patterns components
74    business_day_calculator: Option<BusinessDayCalculator>,
75    processing_lag_calculator: Option<ProcessingLagCalculator>,
76    temporal_patterns_config: Option<TemporalPatternsConfig>,
77    // Business-process weights for the O2C/P2P/R2R/H2R/A2R volume mix. Must
78    // sum to 1.0 (validated by config schema). Default matches the legacy
79    // hard-coded 0.35/0.30/0.20/0.10/0.05 distribution.
80    business_process_weights: [(BusinessProcess, f64); 5],
81    // v3.4.0 advanced distributions (mixture models + industry profiles).
82    // None preserves v3.3.2 byte-for-byte behavior; populated only when the
83    // caller opts in via [`set_advanced_distributions`].
84    advanced_amount_sampler: Option<AdvancedAmountSampler>,
85    // v3.5.3+ conditional amount override. Populated when
86    // `config.distributions.conditional` contains an entry where
87    // `output_field == "amount"` and `input_field ∈ {"month",
88    // "quarter", "constant"}`. Applied *after* the fraud-pattern /
89    // advanced-sampler / legacy-sampler cascade on non-fraud entries
90    // so it can steer amounts by calendar context without disturbing
91    // fraud semantics.
92    conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93    // v3.5.4+ Gaussian copula for amount↔line_count correlation. When
94    // populated, each non-fraud JE draws a (u, v) pair; u nudges amount
95    // via a `(0.75 + 0.5*u)` multiplier and v biases line_count toward
96    // the upper/lower end of its range. Produces observable Spearman
97    // correlation without rewiring existing samplers for inverse-CDF.
98    correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102    (BusinessProcess::O2C, 0.35),
103    (BusinessProcess::P2P, 0.30),
104    (BusinessProcess::R2R, 0.20),
105    (BusinessProcess::H2R, 0.10),
106    (BusinessProcess::A2R, 0.05),
107];
108
109/// Map the schema-level [`datasynth_config::schema::IndustryProfileType`]
110/// onto the distributions-layer [`IndustryType`], then return that industry's
111/// pre-configured `sales_amounts` mixture. Used as a fallback when the
112/// caller enables `distributions.amounts` but supplies no components.
113/// v3.5.3+ — check whether the configured `input_field` is one the JE
114/// generator can compute per-entry. Currently supported:
115///
116///   - `"month"` — posting-date month (1..=12)
117///   - `"quarter"` — posting-date quarter (1..=4)
118///   - `"constant"` / empty — always 0.0 (treats as unconditional)
119///
120/// Unsupported values cause the conditional rule to be silently ignored
121/// to keep runtime robust against user typos.
122impl JournalEntryGenerator {
123    fn supported_conditional_input(field: &str) -> bool {
124        matches!(field, "month" | "quarter" | "constant" | "")
125    }
126
127    fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
128        match self
129            .conditional_amount_override
130            .as_ref()
131            .map(|s| s.config().input_field.as_str())
132        {
133            Some("month") => posting_date.month() as f64,
134            Some("quarter") => ((posting_date.month() - 1) / 3 + 1) as f64,
135            _ => 0.0,
136        }
137    }
138}
139
140fn industry_profile_to_log_normal(
141    p: datasynth_config::schema::IndustryProfileType,
142) -> datasynth_core::distributions::LogNormalMixtureConfig {
143    use datasynth_config::schema::IndustryProfileType as P;
144    let industry = match p {
145        P::Retail => IndustryType::Retail,
146        P::Manufacturing => IndustryType::Manufacturing,
147        P::FinancialServices => IndustryType::FinancialServices,
148        P::Healthcare => IndustryType::Healthcare,
149        P::Technology => IndustryType::Technology,
150    };
151    IndustryAmountProfile::for_industry(industry).sales_amounts
152}
153
154/// State for tracking batch processing behavior.
155///
156/// When humans process transactions, they often batch similar items together
157/// (e.g., processing all invoices from one vendor, entering similar expenses).
158#[derive(Clone)]
159struct BatchState {
160    /// The base entry template to vary
161    base_account_number: String,
162    base_amount: rust_decimal::Decimal,
163    base_business_process: Option<BusinessProcess>,
164    base_posting_date: NaiveDate,
165    /// Remaining entries in this batch
166    remaining: u8,
167}
168
169impl JournalEntryGenerator {
170    /// Create a new journal entry generator.
171    pub fn new_with_params(
172        config: TransactionConfig,
173        coa: Arc<ChartOfAccounts>,
174        companies: Vec<String>,
175        start_date: NaiveDate,
176        end_date: NaiveDate,
177        seed: u64,
178    ) -> Self {
179        Self::new_with_full_config(
180            config,
181            coa,
182            companies,
183            start_date,
184            end_date,
185            seed,
186            TemplateConfig::default(),
187            None,
188        )
189    }
190
191    /// Create a new journal entry generator with full configuration.
192    #[allow(clippy::too_many_arguments)]
193    pub fn new_with_full_config(
194        config: TransactionConfig,
195        coa: Arc<ChartOfAccounts>,
196        companies: Vec<String>,
197        start_date: NaiveDate,
198        end_date: NaiveDate,
199        seed: u64,
200        template_config: TemplateConfig,
201        user_pool: Option<UserPool>,
202    ) -> Self {
203        // Initialize user pool if not provided
204        let user_pool = user_pool.or_else(|| {
205            if template_config.names.generate_realistic_names {
206                let user_gen_config = UserGeneratorConfig {
207                    culture_distribution: vec![
208                        (
209                            datasynth_core::templates::NameCulture::WesternUs,
210                            template_config.names.culture_distribution.western_us,
211                        ),
212                        (
213                            datasynth_core::templates::NameCulture::Hispanic,
214                            template_config.names.culture_distribution.hispanic,
215                        ),
216                        (
217                            datasynth_core::templates::NameCulture::German,
218                            template_config.names.culture_distribution.german,
219                        ),
220                        (
221                            datasynth_core::templates::NameCulture::French,
222                            template_config.names.culture_distribution.french,
223                        ),
224                        (
225                            datasynth_core::templates::NameCulture::Chinese,
226                            template_config.names.culture_distribution.chinese,
227                        ),
228                        (
229                            datasynth_core::templates::NameCulture::Japanese,
230                            template_config.names.culture_distribution.japanese,
231                        ),
232                        (
233                            datasynth_core::templates::NameCulture::Indian,
234                            template_config.names.culture_distribution.indian,
235                        ),
236                    ],
237                    email_domain: template_config.names.email_domain.clone(),
238                    generate_realistic_names: true,
239                };
240                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
241                Some(user_gen.generate_standard(&companies))
242            } else {
243                None
244            }
245        });
246
247        // Initialize reference generator
248        let mut ref_gen = ReferenceGenerator::new(
249            start_date.year(),
250            companies
251                .first()
252                .map(std::string::String::as_str)
253                .unwrap_or("1000"),
254        );
255        ref_gen.set_prefix(
256            ReferenceType::Invoice,
257            &template_config.references.invoice_prefix,
258        );
259        ref_gen.set_prefix(
260            ReferenceType::PurchaseOrder,
261            &template_config.references.po_prefix,
262        );
263        ref_gen.set_prefix(
264            ReferenceType::SalesOrder,
265            &template_config.references.so_prefix,
266        );
267
268        // Create weighted company selector (uniform weights for this constructor)
269        let company_selector = WeightedCompanySelector::uniform(companies.clone());
270
271        Self {
272            rng: seeded_rng(seed, 0),
273            seed,
274            config: config.clone(),
275            coa,
276            companies,
277            company_selector,
278            line_sampler: LineItemSampler::with_config(
279                seed + 1,
280                config.line_item_distribution.clone(),
281                config.even_odd_distribution.clone(),
282                config.debit_credit_distribution.clone(),
283            ),
284            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
285            temporal_sampler: TemporalSampler::with_config(
286                seed + 3,
287                config.seasonality.clone(),
288                WorkingHoursConfig::default(),
289                Vec::new(),
290            ),
291            start_date,
292            end_date,
293            count: 0,
294            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
295            user_pool,
296            description_generator: DescriptionGenerator::new(),
297            reference_generator: ref_gen,
298            template_config,
299            vendor_pool: VendorPool::standard(),
300            customer_pool: CustomerPool::standard(),
301            material_pool: None,
302            using_real_master_data: false,
303            fraud_config: FraudConfig::default(),
304            persona_errors_enabled: true, // Enable by default for realism
305            approval_enabled: true,       // Enable by default for realism
306            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
307            sod_violation_rate: 0.10,     // 10% default SOD violation rate
308            batch_state: None,
309            drift_controller: None,
310            // Always provide a basic BusinessDayCalculator so that weekend/holiday
311            // filtering is active even when temporal_patterns is not explicitly enabled.
312            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
313                Region::US,
314                start_date.year(),
315            ))),
316            processing_lag_calculator: None,
317            temporal_patterns_config: None,
318            business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
319            advanced_amount_sampler: None,
320            conditional_amount_override: None,
321            correlation_copula: None,
322        }
323    }
324
325    /// Wire v3.4.0 advanced distributions. When the caller's config has
326    /// `distributions.enabled = true` AND `distributions.amounts.enabled =
327    /// true`, the journal-entry generator routes non-fraud amount sampling
328    /// through an [`AdvancedAmountSampler`] (log-normal or Gaussian mixture).
329    ///
330    /// When `distributions.industry_profile` is `Some`, the caller's
331    /// explicitly configured components override nothing — if the component
332    /// list is empty, the industry profile's `sales_amounts` mixture is used
333    /// instead. Explicit components always win.
334    ///
335    /// Returning `Ok(())` with no side effect is intentional for the
336    /// following no-op cases, so callers can unconditionally invoke this:
337    ///   - `config.enabled = false`
338    ///   - `config.amounts.enabled = false`
339    ///   - empty component list with no industry profile
340    ///
341    /// Errors propagate from mixture validation (e.g. weights not summing
342    /// to 1.0, non-positive sigma).
343    pub fn set_advanced_distributions(
344        &mut self,
345        config: &AdvancedDistributionConfig,
346        seed: u64,
347    ) -> Result<(), String> {
348        if !config.enabled {
349            return Ok(());
350        }
351
352        // v3.5.3+: build a conditional-amount override when the config
353        // declares a rule with `output_field == "amount"` and a supported
354        // input field. The override is applied *after* the standard
355        // cascade so it doesn't disturb fraud-path sampling. Unsupported
356        // input fields are ignored with a trace log.
357        self.conditional_amount_override = config
358            .conditional
359            .iter()
360            .find(|c| {
361                c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
362            })
363            .and_then(|c| {
364                datasynth_core::distributions::ConditionalSampler::new(
365                    seed.wrapping_add(17),
366                    c.to_core_config(),
367                )
368                .ok()
369            });
370
371        // v3.5.4+: build a Gaussian-copula sampler for the amount ↔
372        // line_count pair. Only fires for Gaussian copula in this
373        // release; other copula types are recognised by the schema
374        // converter but left inert at runtime until the next minor.
375        self.correlation_copula = config
376            .correlations
377            .to_core_config_for_pair("amount", "line_count")
378            .filter(|c| {
379                matches!(
380                    c.copula_type,
381                    datasynth_core::distributions::CopulaType::Gaussian
382                )
383            })
384            .and_then(|copula_cfg| {
385                datasynth_core::distributions::BivariateCopulaSampler::new(
386                    seed.wrapping_add(31),
387                    copula_cfg,
388                )
389                .ok()
390            });
391
392        // v3.4.4+: Pareto takes precedence over mixture models when set.
393        // This supports heavy-tailed amount distributions (capex, strategic
394        // contracts, fraud) that log-normal/Gaussian mixtures can't model
395        // as sharply.
396        if let Some(pareto) = &config.pareto {
397            if pareto.enabled {
398                let core_cfg = pareto.to_core_config();
399                self.advanced_amount_sampler =
400                    Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
401                return Ok(());
402            }
403        }
404
405        if !config.amounts.enabled {
406            return Ok(());
407        }
408
409        match config.amounts.distribution_type {
410            MixtureDistributionType::LogNormal => {
411                let lognormal_cfg = config
412                    .amounts
413                    .to_log_normal_config()
414                    .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
415                if let Some(cfg) = lognormal_cfg {
416                    self.advanced_amount_sampler =
417                        Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
418                }
419            }
420            MixtureDistributionType::Gaussian => {
421                if let Some(cfg) = config.amounts.to_gaussian_config() {
422                    self.advanced_amount_sampler =
423                        Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
424                }
425            }
426        }
427
428        Ok(())
429    }
430
431    /// Override the business-process volume mix. Weights map directly to the
432    /// `business_processes.*_weight` YAML config; they do not have to sum to
433    /// exactly 1.0 (they're normalized via `weighted_select`).
434    pub fn set_business_process_weights(
435        &mut self,
436        o2c: f64,
437        p2p: f64,
438        r2r: f64,
439        h2r: f64,
440        a2r: f64,
441    ) {
442        self.business_process_weights = [
443            (BusinessProcess::O2C, o2c),
444            (BusinessProcess::P2P, p2p),
445            (BusinessProcess::R2R, r2r),
446            (BusinessProcess::H2R, h2r),
447            (BusinessProcess::A2R, a2r),
448        ];
449    }
450
451    /// Create from a full GeneratorConfig.
452    ///
453    /// This constructor uses the volume_weight from company configs
454    /// for weighted company selection, and fraud config from GeneratorConfig.
455    pub fn from_generator_config(
456        full_config: &GeneratorConfig,
457        coa: Arc<ChartOfAccounts>,
458        start_date: NaiveDate,
459        end_date: NaiveDate,
460        seed: u64,
461    ) -> Self {
462        let companies: Vec<String> = full_config
463            .companies
464            .iter()
465            .map(|c| c.code.clone())
466            .collect();
467
468        // Create weighted selector using volume_weight from company configs
469        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
470
471        let mut generator = Self::new_with_full_config(
472            full_config.transactions.clone(),
473            coa,
474            companies,
475            start_date,
476            end_date,
477            seed,
478            full_config.templates.clone(),
479            None,
480        );
481
482        // Override the uniform selector with weighted selector
483        generator.company_selector = company_selector;
484
485        // Set fraud config
486        generator.fraud_config = full_config.fraud.clone();
487
488        // Configure temporal patterns if enabled
489        let temporal_config = &full_config.temporal_patterns;
490        if temporal_config.enabled {
491            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
492        }
493
494        generator
495    }
496
497    /// Configure temporal patterns including business day calculations and processing lags.
498    ///
499    /// This enables realistic temporal behavior including:
500    /// - Business day awareness (no postings on weekends/holidays)
501    /// - Processing lag modeling (event-to-posting delays)
502    /// - Period-end dynamics (volume spikes at month/quarter/year end)
503    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
504        // Create business day calculator if enabled
505        if config.business_days.enabled {
506            let region = config
507                .calendars
508                .regions
509                .first()
510                .map(|r| Self::parse_region(r))
511                .unwrap_or(Region::US);
512
513            let calendar = HolidayCalendar::new(region, self.start_date.year());
514            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
515        }
516
517        // Create processing lag calculator if enabled
518        if config.processing_lags.enabled {
519            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
520            self.processing_lag_calculator =
521                Some(ProcessingLagCalculator::with_config(seed, lag_config));
522        }
523
524        // Create period-end dynamics if configured
525        let model = config.period_end.model.as_deref().unwrap_or("flat");
526        if model != "flat"
527            || config
528                .period_end
529                .month_end
530                .as_ref()
531                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
532        {
533            let dynamics = Self::convert_period_end_config(&config.period_end);
534            self.temporal_sampler.set_period_end_dynamics(dynamics);
535        }
536
537        self.temporal_patterns_config = Some(config);
538        self
539    }
540
541    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
542    ///
543    /// This is an alternative to [`with_temporal_patterns`] that derives the
544    /// holiday calendar from a country-pack definition rather than the built-in
545    /// region-based calendars.  All other temporal behaviour (business-day
546    /// adjustment, processing lags, period-end dynamics) is configured
547    /// identically.
548    pub fn with_country_pack_temporal(
549        mut self,
550        config: TemporalPatternsConfig,
551        seed: u64,
552        pack: &CountryPack,
553    ) -> Self {
554        // Create business day calculator using the country pack calendar
555        if config.business_days.enabled {
556            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
557            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
558        }
559
560        // Create processing lag calculator if enabled
561        if config.processing_lags.enabled {
562            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
563            self.processing_lag_calculator =
564                Some(ProcessingLagCalculator::with_config(seed, lag_config));
565        }
566
567        // Create period-end dynamics if configured
568        let model = config.period_end.model.as_deref().unwrap_or("flat");
569        if model != "flat"
570            || config
571                .period_end
572                .month_end
573                .as_ref()
574                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
575        {
576            let dynamics = Self::convert_period_end_config(&config.period_end);
577            self.temporal_sampler.set_period_end_dynamics(dynamics);
578        }
579
580        self.temporal_patterns_config = Some(config);
581        self
582    }
583
584    /// Convert schema processing lag config to core config.
585    fn convert_processing_lag_config(
586        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
587    ) -> ProcessingLagConfig {
588        let mut config = ProcessingLagConfig {
589            enabled: schema.enabled,
590            ..Default::default()
591        };
592
593        // Helper to convert lag schema to distribution
594        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
595            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
596            if let Some(min) = lag.min_hours {
597                dist.min_lag_hours = min;
598            }
599            if let Some(max) = lag.max_hours {
600                dist.max_lag_hours = max;
601            }
602            dist
603        };
604
605        // Apply event-specific lags
606        if let Some(ref lag) = schema.sales_order_lag {
607            config
608                .event_lags
609                .insert(EventType::SalesOrder, convert_lag(lag));
610        }
611        if let Some(ref lag) = schema.purchase_order_lag {
612            config
613                .event_lags
614                .insert(EventType::PurchaseOrder, convert_lag(lag));
615        }
616        if let Some(ref lag) = schema.goods_receipt_lag {
617            config
618                .event_lags
619                .insert(EventType::GoodsReceipt, convert_lag(lag));
620        }
621        if let Some(ref lag) = schema.invoice_receipt_lag {
622            config
623                .event_lags
624                .insert(EventType::InvoiceReceipt, convert_lag(lag));
625        }
626        if let Some(ref lag) = schema.invoice_issue_lag {
627            config
628                .event_lags
629                .insert(EventType::InvoiceIssue, convert_lag(lag));
630        }
631        if let Some(ref lag) = schema.payment_lag {
632            config
633                .event_lags
634                .insert(EventType::Payment, convert_lag(lag));
635        }
636        if let Some(ref lag) = schema.journal_entry_lag {
637            config
638                .event_lags
639                .insert(EventType::JournalEntry, convert_lag(lag));
640        }
641
642        // Apply cross-day posting config
643        if let Some(ref cross_day) = schema.cross_day_posting {
644            config.cross_day = CrossDayConfig {
645                enabled: cross_day.enabled,
646                probability_by_hour: cross_day.probability_by_hour.clone(),
647                ..Default::default()
648            };
649        }
650
651        config
652    }
653
654    /// Convert schema period-end config to core PeriodEndDynamics.
655    fn convert_period_end_config(
656        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
657    ) -> PeriodEndDynamics {
658        let model_type = schema.model.as_deref().unwrap_or("exponential");
659
660        // Helper to convert period config
661        let convert_period =
662            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
663             default_peak: f64|
664             -> PeriodEndConfig {
665                if let Some(p) = period {
666                    let model = match model_type {
667                        "flat" => PeriodEndModel::FlatMultiplier {
668                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
669                        },
670                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
671                            start_day: p.start_day.unwrap_or(-10),
672                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
673                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
674                            ramp_up_days: 3, // Default ramp-up period
675                        },
676                        _ => PeriodEndModel::ExponentialAcceleration {
677                            start_day: p.start_day.unwrap_or(-10),
678                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
679                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
680                            decay_rate: p.decay_rate.unwrap_or(0.3),
681                        },
682                    };
683                    PeriodEndConfig {
684                        enabled: true,
685                        model,
686                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
687                    }
688                } else {
689                    PeriodEndConfig {
690                        enabled: true,
691                        model: PeriodEndModel::ExponentialAcceleration {
692                            start_day: -10,
693                            base_multiplier: 1.0,
694                            peak_multiplier: default_peak,
695                            decay_rate: 0.3,
696                        },
697                        additional_multiplier: 1.0,
698                    }
699                }
700            };
701
702        PeriodEndDynamics::new(
703            convert_period(schema.month_end.as_ref(), 2.0),
704            convert_period(schema.quarter_end.as_ref(), 3.5),
705            convert_period(schema.year_end.as_ref(), 5.0),
706        )
707    }
708
709    /// Parse a region string into a Region enum.
710    fn parse_region(region_str: &str) -> Region {
711        match region_str.to_uppercase().as_str() {
712            "US" => Region::US,
713            "DE" => Region::DE,
714            "GB" => Region::GB,
715            "CN" => Region::CN,
716            "JP" => Region::JP,
717            "IN" => Region::IN,
718            "BR" => Region::BR,
719            "MX" => Region::MX,
720            "AU" => Region::AU,
721            "SG" => Region::SG,
722            "KR" => Region::KR,
723            "FR" => Region::FR,
724            "IT" => Region::IT,
725            "ES" => Region::ES,
726            "CA" => Region::CA,
727            _ => Region::US,
728        }
729    }
730
731    /// Set a custom company selector.
732    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
733        self.company_selector = selector;
734    }
735
736    /// Get the current company selector.
737    pub fn company_selector(&self) -> &WeightedCompanySelector {
738        &self.company_selector
739    }
740
741    /// Set fraud configuration.
742    pub fn set_fraud_config(&mut self, config: FraudConfig) {
743        self.fraud_config = config;
744    }
745
746    /// Set vendors from generated master data.
747    ///
748    /// This replaces the default vendor pool with actual generated vendors,
749    /// ensuring JEs reference real master data entities.
750    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
751        if !vendors.is_empty() {
752            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
753            self.using_real_master_data = true;
754        }
755        self
756    }
757
758    /// Set customers from generated master data.
759    ///
760    /// This replaces the default customer pool with actual generated customers,
761    /// ensuring JEs reference real master data entities.
762    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
763        if !customers.is_empty() {
764            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
765            self.using_real_master_data = true;
766        }
767        self
768    }
769
770    /// Set materials from generated master data.
771    ///
772    /// This provides material references for JEs that involve inventory movements.
773    pub fn with_materials(mut self, materials: &[Material]) -> Self {
774        if !materials.is_empty() {
775            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
776            self.using_real_master_data = true;
777        }
778        self
779    }
780
781    /// Set all master data at once for convenience.
782    ///
783    /// This is the recommended way to configure the JE generator with
784    /// generated master data to ensure data coherence.
785    pub fn with_master_data(
786        self,
787        vendors: &[Vendor],
788        customers: &[Customer],
789        materials: &[Material],
790    ) -> Self {
791        self.with_vendors(vendors)
792            .with_customers(customers)
793            .with_materials(materials)
794    }
795
796    /// Replace the user pool with one generated from a [`CountryPack`].
797    ///
798    /// This is an alternative to the default name-culture distribution that
799    /// derives name pools and weights from the country-pack's `names` section.
800    /// The existing user pool (if any) is discarded and regenerated using
801    /// [`MultiCultureNameGenerator::from_country_pack`].
802    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
803        let name_gen =
804            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
805        let config = UserGeneratorConfig {
806            // The culture distribution is embedded in the name generator
807            // itself, so we use an empty list here.
808            culture_distribution: Vec::new(),
809            email_domain: name_gen.email_domain().to_string(),
810            generate_realistic_names: true,
811        };
812        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
813        self.user_pool = Some(user_gen.generate_standard(&self.companies));
814        self
815    }
816
817    /// Check if the generator is using real master data.
818    pub fn is_using_real_master_data(&self) -> bool {
819        self.using_real_master_data
820    }
821
822    /// Determine if this transaction should be fraudulent.
823    fn determine_fraud(&mut self) -> Option<FraudType> {
824        if !self.fraud_config.enabled {
825            return None;
826        }
827
828        // Roll for fraud based on fraud rate
829        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
830            return None;
831        }
832
833        // Select fraud type based on distribution
834        Some(self.select_fraud_type())
835    }
836
837    /// Select a fraud type based on the configured distribution.
838    fn select_fraud_type(&mut self) -> FraudType {
839        let dist = &self.fraud_config.fraud_type_distribution;
840        let roll: f64 = self.rng.random();
841
842        let mut cumulative = 0.0;
843
844        cumulative += dist.suspense_account_abuse;
845        if roll < cumulative {
846            return FraudType::SuspenseAccountAbuse;
847        }
848
849        cumulative += dist.fictitious_transaction;
850        if roll < cumulative {
851            return FraudType::FictitiousTransaction;
852        }
853
854        cumulative += dist.revenue_manipulation;
855        if roll < cumulative {
856            return FraudType::RevenueManipulation;
857        }
858
859        cumulative += dist.expense_capitalization;
860        if roll < cumulative {
861            return FraudType::ExpenseCapitalization;
862        }
863
864        cumulative += dist.split_transaction;
865        if roll < cumulative {
866            return FraudType::SplitTransaction;
867        }
868
869        cumulative += dist.timing_anomaly;
870        if roll < cumulative {
871            return FraudType::TimingAnomaly;
872        }
873
874        cumulative += dist.unauthorized_access;
875        if roll < cumulative {
876            return FraudType::UnauthorizedAccess;
877        }
878
879        // Default fallback
880        FraudType::DuplicatePayment
881    }
882
883    /// Map a fraud type to an amount pattern for suspicious amounts.
884    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
885        match fraud_type {
886            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
887                FraudAmountPattern::ThresholdAdjacent
888            }
889            FraudType::FictitiousTransaction
890            | FraudType::FictitiousEntry
891            | FraudType::SuspenseAccountAbuse
892            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
893            FraudType::RevenueManipulation
894            | FraudType::ExpenseCapitalization
895            | FraudType::ImproperCapitalization
896            | FraudType::ReserveManipulation
897            | FraudType::UnauthorizedAccess
898            | FraudType::PrematureRevenue
899            | FraudType::UnderstatedLiabilities
900            | FraudType::OverstatedAssets
901            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
902            FraudType::DuplicatePayment
903            | FraudType::TimingAnomaly
904            | FraudType::SelfApproval
905            | FraudType::ExceededApprovalLimit
906            | FraudType::SegregationOfDutiesViolation
907            | FraudType::UnauthorizedApproval
908            | FraudType::CollusiveApproval
909            | FraudType::FictitiousVendor
910            | FraudType::ShellCompanyPayment
911            | FraudType::Kickback
912            | FraudType::KickbackScheme
913            | FraudType::InvoiceManipulation
914            | FraudType::AssetMisappropriation
915            | FraudType::InventoryTheft
916            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
917            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
918            FraudType::ImproperRevenueRecognition
919            | FraudType::ImproperPoAllocation
920            | FraudType::VariableConsiderationManipulation
921            | FraudType::ContractModificationMisstatement => {
922                FraudAmountPattern::StatisticallyImprobable
923            }
924            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
925            FraudType::LeaseClassificationManipulation
926            | FraudType::OffBalanceSheetLease
927            | FraudType::LeaseLiabilityUnderstatement
928            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
929            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
930            FraudType::FairValueHierarchyManipulation
931            | FraudType::Level3InputManipulation
932            | FraudType::ValuationTechniqueManipulation => {
933                FraudAmountPattern::StatisticallyImprobable
934            }
935            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
936            FraudType::DelayedImpairment
937            | FraudType::ImpairmentTestAvoidance
938            | FraudType::CashFlowProjectionManipulation
939            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
940            // Sourcing/Procurement Fraud
941            FraudType::BidRigging
942            | FraudType::PhantomVendorContract
943            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
944            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
945            // HR/Payroll Fraud
946            FraudType::GhostEmployeePayroll
947            | FraudType::PayrollInflation
948            | FraudType::DuplicateExpenseReport
949            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
950            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
951            // O2C Fraud
952            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
953            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
954        }
955    }
956
957    /// Generate a deterministic UUID using the factory.
958    #[inline]
959    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
960        self.uuid_factory.next()
961    }
962
963    /// Cost center pool used for expense account enrichment.
964    const COST_CENTER_POOL: &'static [&'static str] =
965        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
966
967    /// Enrich journal entry line items with account descriptions, cost centers,
968    /// profit centers, value dates, line text, and assignment fields.
969    ///
970    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
971    /// and `::credit()` leave as `None`.
972    fn enrich_line_items(&self, entry: &mut JournalEntry) {
973        let posting_date = entry.header.posting_date;
974        let company_code = &entry.header.company_code;
975        let header_text = entry.header.header_text.clone();
976        let business_process = entry.header.business_process;
977
978        // Derive a deterministic index from the document_id for cost center selection
979        let doc_id_bytes = entry.header.document_id.as_bytes();
980        let mut cc_seed: usize = 0;
981        for &b in doc_id_bytes {
982            cc_seed = cc_seed.wrapping_add(b as usize);
983        }
984
985        for (i, line) in entry.lines.iter_mut().enumerate() {
986            // 1. account_description: look up from CoA
987            if line.account_description.is_none() {
988                line.account_description = self
989                    .coa
990                    .get_account(&line.gl_account)
991                    .map(|a| a.short_description.clone());
992            }
993
994            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
995            if line.cost_center.is_none() {
996                let first_char = line.gl_account.chars().next().unwrap_or('0');
997                if first_char == '5' || first_char == '6' {
998                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
999                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1000                }
1001            }
1002
1003            // 3. profit_center: derive from company code + business process
1004            if line.profit_center.is_none() {
1005                let suffix = match business_process {
1006                    Some(BusinessProcess::P2P) => "-P2P",
1007                    Some(BusinessProcess::O2C) => "-O2C",
1008                    Some(BusinessProcess::R2R) => "-R2R",
1009                    Some(BusinessProcess::H2R) => "-H2R",
1010                    _ => "",
1011                };
1012                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1013            }
1014
1015            // 4. line_text: fall back to header_text if not already set
1016            if line.line_text.is_none() {
1017                line.line_text = header_text.clone();
1018            }
1019
1020            // 5. value_date: set to posting_date for AR/AP accounts
1021            if line.value_date.is_none()
1022                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1023            {
1024                line.value_date = Some(posting_date);
1025            }
1026
1027            // 6. assignment: set to vendor/customer reference for AP/AR lines
1028            if line.assignment.is_none() {
1029                if line.gl_account.starts_with("2000") {
1030                    // AP line - use vendor reference from header
1031                    if let Some(ref ht) = header_text {
1032                        // Try to extract vendor ID from header text patterns like "... - V-001"
1033                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
1034                            if vendor_part.starts_with("V-")
1035                                || vendor_part.starts_with("VENDOR")
1036                                || vendor_part.starts_with("Vendor")
1037                            {
1038                                line.assignment = Some(vendor_part.to_string());
1039                            }
1040                        }
1041                    }
1042                } else if line.gl_account.starts_with("1100") {
1043                    // AR line - use customer reference from header
1044                    if let Some(ref ht) = header_text {
1045                        if let Some(customer_part) = ht.rsplit(" - ").next() {
1046                            if customer_part.starts_with("C-")
1047                                || customer_part.starts_with("CUST")
1048                                || customer_part.starts_with("Customer")
1049                            {
1050                                line.assignment = Some(customer_part.to_string());
1051                            }
1052                        }
1053                    }
1054                }
1055            }
1056        }
1057    }
1058
1059    /// Generate a single journal entry.
1060    pub fn generate(&mut self) -> JournalEntry {
1061        debug!(
1062            count = self.count,
1063            companies = self.companies.len(),
1064            start_date = %self.start_date,
1065            end_date = %self.end_date,
1066            "Generating journal entry"
1067        );
1068
1069        // Check if we're in a batch - if so, generate a batched entry
1070        if let Some(ref state) = self.batch_state {
1071            if state.remaining > 0 {
1072                return self.generate_batched_entry();
1073            }
1074        }
1075
1076        self.count += 1;
1077
1078        // Generate deterministic document ID
1079        let document_id = self.generate_deterministic_uuid();
1080
1081        // Sample posting date
1082        let mut posting_date = self
1083            .temporal_sampler
1084            .sample_date(self.start_date, self.end_date);
1085
1086        // Adjust posting date to be a business day if business day calculator is configured
1087        if let Some(ref calc) = self.business_day_calculator {
1088            if !calc.is_business_day(posting_date) {
1089                // Move to next business day
1090                posting_date = calc.next_business_day(posting_date, false);
1091                // Ensure we don't exceed end_date
1092                if posting_date > self.end_date {
1093                    posting_date = calc.prev_business_day(self.end_date, true);
1094                }
1095            }
1096        }
1097
1098        // Select company using weighted selector
1099        let company_code = self.company_selector.select(&mut self.rng).to_string();
1100
1101        // Sample line item specification
1102        let line_spec = self.line_sampler.sample();
1103
1104        // Determine source type using full 4-way distribution
1105        let source = self.select_source();
1106        let is_automated = matches!(
1107            source,
1108            TransactionSource::Automated | TransactionSource::Recurring
1109        );
1110
1111        // Select business process
1112        let business_process = self.select_business_process();
1113
1114        // Determine if this is a fraudulent transaction
1115        let fraud_type = self.determine_fraud();
1116        let is_fraud = fraud_type.is_some();
1117
1118        // Sample time based on source
1119        let time = self.temporal_sampler.sample_time(!is_automated);
1120        let created_at = posting_date.and_time(time).and_utc();
1121
1122        // Select user from pool or generate generic
1123        let (created_by, user_persona) = self.select_user(is_automated);
1124
1125        // Create header with deterministic UUID
1126        let mut header =
1127            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1128        header.created_at = created_at;
1129        header.source = source;
1130        header.created_by = created_by;
1131        header.user_persona = user_persona;
1132        header.business_process = Some(business_process);
1133        header.document_type = Self::document_type_for_process(business_process).to_string();
1134        header.is_fraud = is_fraud;
1135        header.fraud_type = fraud_type;
1136
1137        // --- ISA 240 audit flags ---
1138        let is_manual = matches!(source, TransactionSource::Manual);
1139        header.is_manual = is_manual;
1140
1141        // Determine source_system based on manual vs automated
1142        header.source_system = if is_manual {
1143            if self.rng.random::<f64>() < 0.70 {
1144                "manual".to_string()
1145            } else {
1146                "spreadsheet".to_string()
1147            }
1148        } else {
1149            let roll: f64 = self.rng.random();
1150            if roll < 0.40 {
1151                "SAP-FI".to_string()
1152            } else if roll < 0.60 {
1153                "SAP-MM".to_string()
1154            } else if roll < 0.80 {
1155                "SAP-SD".to_string()
1156            } else if roll < 0.95 {
1157                "interface".to_string()
1158            } else {
1159                "SAP-HR".to_string()
1160            }
1161        };
1162
1163        // is_post_close: entry is in the last month of the configured period
1164        // and the posting date falls after the 25th (simulating close cutoff)
1165        let is_post_close = posting_date.month() == self.end_date.month()
1166            && posting_date.year() == self.end_date.year()
1167            && posting_date.day() > 25;
1168        header.is_post_close = is_post_close;
1169
1170        // created_date: for manual entries, same day as posting; for automated,
1171        // 0-3 days before posting_date
1172        let created_date = if is_manual {
1173            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1174        } else {
1175            let lag_days = self.rng.random_range(0i64..=3);
1176            let created_naive_date = posting_date
1177                .checked_sub_signed(chrono::Duration::days(lag_days))
1178                .unwrap_or(posting_date);
1179            created_naive_date.and_hms_opt(
1180                self.rng.random_range(8u32..=17),
1181                self.rng.random_range(0u32..=59),
1182                self.rng.random_range(0u32..=59),
1183            )
1184        };
1185        header.created_date = created_date;
1186
1187        // Generate description context
1188        let mut context =
1189            DescriptionContext::with_period(posting_date.month(), posting_date.year());
1190
1191        // Add vendor/customer context based on business process
1192        match business_process {
1193            BusinessProcess::P2P => {
1194                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1195                    context.vendor_name = Some(vendor.name.clone());
1196                }
1197            }
1198            BusinessProcess::O2C => {
1199                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1200                    context.customer_name = Some(customer.name.clone());
1201                }
1202            }
1203            _ => {}
1204        }
1205
1206        // Generate header text if enabled
1207        if self.template_config.descriptions.generate_header_text {
1208            header.header_text = Some(self.description_generator.generate_header_text(
1209                business_process,
1210                &context,
1211                &mut self.rng,
1212            ));
1213        }
1214
1215        // Generate reference if enabled
1216        if self.template_config.references.generate_references {
1217            header.reference = Some(
1218                self.reference_generator
1219                    .generate_for_process_year(business_process, posting_date.year()),
1220            );
1221        }
1222
1223        // Derive typed source document from reference prefix
1224        header.source_document = header
1225            .reference
1226            .as_deref()
1227            .and_then(DocumentRef::parse)
1228            .or_else(|| {
1229                if header.source == TransactionSource::Manual {
1230                    Some(DocumentRef::Manual)
1231                } else {
1232                    None
1233                }
1234            });
1235
1236        // Generate line items
1237        let mut entry = JournalEntry::new(header);
1238
1239        // Generate amount - use fraud pattern if this is a fraudulent transaction.
1240        // Non-fraud path prefers the v3.4.0 advanced sampler when configured; fraud
1241        // patterns always use the legacy sampler because they target specific
1242        // thresholds (round numbers, just-under-approval amounts) that are
1243        // orthogonal to mixture models.
1244        let base_amount = if let Some(ft) = fraud_type {
1245            let pattern = self.fraud_type_to_amount_pattern(ft);
1246            self.amount_sampler.sample_fraud(pattern)
1247        } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1248            adv.sample_decimal()
1249        } else {
1250            self.amount_sampler.sample()
1251        };
1252        // v3.5.3+: if a conditional-amount override is configured and
1253        // the JE is non-fraud, re-sample the amount from the conditional
1254        // distribution using the computed context. Fraud entries bypass
1255        // this path to preserve fraud-pattern semantics (as with the
1256        // advanced sampler cascade above).
1257        let base_amount = if fraud_type.is_none() {
1258            // Compute input context BEFORE taking &mut on the sampler
1259            // to avoid borrow-checker conflict with the immutable
1260            // `conditional_input_value` call.
1261            let input = self.conditional_input_value(posting_date);
1262            if let Some(ref mut cond) = self.conditional_amount_override {
1263                cond.sample_decimal(input)
1264            } else {
1265                base_amount
1266            }
1267        } else {
1268            base_amount
1269        };
1270
1271        // v3.5.4+: if a Gaussian copula is configured, draw a (u, v)
1272        // pair. `u` scales the non-fraud amount via `0.7 + 0.6*u`,
1273        // producing a deterministic correlation signal between amount
1274        // and a latent driver. `v` is retained for future line-count
1275        // correlation (inverse-CDF sampling scheduled for v3.6.x).
1276        let base_amount = if fraud_type.is_none() {
1277            if let Some(ref mut cop) = self.correlation_copula {
1278                let (u, _v) = cop.sample();
1279                let multiplier = 0.7 + 0.6 * u;
1280                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1281                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1282            } else {
1283                base_amount
1284            }
1285        } else {
1286            base_amount
1287        };
1288
1289        // Apply temporal drift if configured
1290        let drift_adjusted_amount = {
1291            let drift = self.get_drift_adjustments(posting_date);
1292            if drift.amount_mean_multiplier != 1.0 {
1293                // Apply drift multiplier (includes seasonal factor if enabled)
1294                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1295                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1296                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1297            } else {
1298                base_amount
1299            }
1300        };
1301
1302        // Apply human variation to amounts for non-automated transactions
1303        let total_amount = if is_automated {
1304            drift_adjusted_amount // Automated systems use exact amounts
1305        } else {
1306            self.apply_human_variation(drift_adjusted_amount)
1307        };
1308
1309        // Generate debit lines
1310        let debit_amounts = self
1311            .amount_sampler
1312            .sample_summing_to(line_spec.debit_count, total_amount);
1313        for (i, amount) in debit_amounts.into_iter().enumerate() {
1314            let account_number = self.select_debit_account().account_number.clone();
1315            let mut line = JournalEntryLine::debit(
1316                entry.header.document_id,
1317                (i + 1) as u32,
1318                account_number.clone(),
1319                amount,
1320            );
1321
1322            // Generate line text if enabled
1323            if self.template_config.descriptions.generate_line_text {
1324                line.line_text = Some(self.description_generator.generate_line_text(
1325                    &account_number,
1326                    &context,
1327                    &mut self.rng,
1328                ));
1329            }
1330
1331            entry.add_line(line);
1332        }
1333
1334        // Generate credit lines - use the SAME amounts to ensure balance
1335        let credit_amounts = self
1336            .amount_sampler
1337            .sample_summing_to(line_spec.credit_count, total_amount);
1338        for (i, amount) in credit_amounts.into_iter().enumerate() {
1339            let account_number = self.select_credit_account().account_number.clone();
1340            let mut line = JournalEntryLine::credit(
1341                entry.header.document_id,
1342                (line_spec.debit_count + i + 1) as u32,
1343                account_number.clone(),
1344                amount,
1345            );
1346
1347            // Generate line text if enabled
1348            if self.template_config.descriptions.generate_line_text {
1349                line.line_text = Some(self.description_generator.generate_line_text(
1350                    &account_number,
1351                    &context,
1352                    &mut self.rng,
1353                ));
1354            }
1355
1356            entry.add_line(line);
1357        }
1358
1359        // Enrich line items with account descriptions, cost centers, etc.
1360        self.enrich_line_items(&mut entry);
1361
1362        // Apply persona-based errors if enabled and it's a human user
1363        if self.persona_errors_enabled && !is_automated {
1364            self.maybe_inject_persona_error(&mut entry);
1365        }
1366
1367        // Apply approval workflow if enabled and amount exceeds threshold
1368        if self.approval_enabled {
1369            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1370        }
1371
1372        // Populate approved_by / approval_date from the approval workflow
1373        self.populate_approval_fields(&mut entry, posting_date);
1374
1375        // Maybe start a batch of similar entries for realism
1376        self.maybe_start_batch(&entry);
1377
1378        entry
1379    }
1380
1381    /// Enable or disable persona-based error injection.
1382    ///
1383    /// When enabled, entries created by human personas have a chance
1384    /// to contain realistic human errors based on their experience level.
1385    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1386        self.persona_errors_enabled = enabled;
1387        self
1388    }
1389
1390    /// Set fraud configuration for fraud injection.
1391    ///
1392    /// When fraud is enabled in the config, transactions have a chance
1393    /// to be marked as fraudulent based on the configured fraud rate.
1394    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1395        self.fraud_config = config;
1396        self
1397    }
1398
1399    /// Check if persona errors are enabled.
1400    pub fn persona_errors_enabled(&self) -> bool {
1401        self.persona_errors_enabled
1402    }
1403
1404    /// Enable or disable batch processing behavior.
1405    ///
1406    /// When enabled (default), the generator will occasionally produce batches
1407    /// of similar entries, simulating how humans batch similar work together.
1408    pub fn with_batching(mut self, enabled: bool) -> Self {
1409        if !enabled {
1410            self.batch_state = None;
1411        }
1412        self
1413    }
1414
1415    /// Check if batch processing is enabled.
1416    pub fn batching_enabled(&self) -> bool {
1417        // Batching is implicitly enabled when not explicitly disabled
1418        true
1419    }
1420
1421    /// Maybe start a batch based on the current entry.
1422    ///
1423    /// Humans often batch similar work: processing invoices from one vendor,
1424    /// entering expense reports for a trip, reconciling similar items.
1425    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1426        // Only start batch for non-automated, non-fraud entries
1427        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1428            return;
1429        }
1430
1431        // 15% chance to start a batch (most work is not batched)
1432        if self.rng.random::<f64>() > 0.15 {
1433            return;
1434        }
1435
1436        // Extract key attributes for batching
1437        let base_account = entry
1438            .lines
1439            .first()
1440            .map(|l| l.gl_account.clone())
1441            .unwrap_or_default();
1442
1443        let base_amount = entry.total_debit();
1444
1445        self.batch_state = Some(BatchState {
1446            base_account_number: base_account,
1447            base_amount,
1448            base_business_process: entry.header.business_process,
1449            base_posting_date: entry.header.posting_date,
1450            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1451        });
1452    }
1453
1454    /// Generate an entry that's part of the current batch.
1455    ///
1456    /// Batched entries have:
1457    /// - Same or very similar business process
1458    /// - Same posting date (batched work done together)
1459    /// - Similar amounts (within ±15%)
1460    /// - Same debit account (processing similar items)
1461    fn generate_batched_entry(&mut self) -> JournalEntry {
1462        use rust_decimal::Decimal;
1463
1464        // Decrement batch counter
1465        if let Some(ref mut state) = self.batch_state {
1466            state.remaining = state.remaining.saturating_sub(1);
1467        }
1468
1469        let Some(batch) = self.batch_state.clone() else {
1470            // This is a programming error - batch_state should be set before calling this method.
1471            // Clear state and fall back to generating a standard entry instead of panicking.
1472            tracing::warn!(
1473                "generate_batched_entry called without batch_state; generating standard entry"
1474            );
1475            self.batch_state = None;
1476            return self.generate();
1477        };
1478
1479        // Use the batch's posting date (work done on same day)
1480        let posting_date = batch.base_posting_date;
1481
1482        self.count += 1;
1483        let document_id = self.generate_deterministic_uuid();
1484
1485        // Select same company (batched work is usually same company)
1486        let company_code = self.company_selector.select(&mut self.rng).to_string();
1487
1488        // Use simplified line spec for batched entries (usually 2-line)
1489        let _line_spec = LineItemSpec {
1490            total_count: 2,
1491            debit_count: 1,
1492            credit_count: 1,
1493            split_type: DebitCreditSplit::Equal,
1494        };
1495
1496        // Batched entries are always manual
1497        let source = TransactionSource::Manual;
1498
1499        // Use the batch's business process
1500        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1501
1502        // Sample time
1503        let time = self.temporal_sampler.sample_time(true);
1504        let created_at = posting_date.and_time(time).and_utc();
1505
1506        // Same user for batched work
1507        let (created_by, user_persona) = self.select_user(false);
1508
1509        // Create header
1510        let mut header =
1511            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1512        header.created_at = created_at;
1513        header.source = source;
1514        header.created_by = created_by;
1515        header.user_persona = user_persona;
1516        header.business_process = Some(business_process);
1517        header.document_type = Self::document_type_for_process(business_process).to_string();
1518
1519        // Batched manual entries have Manual source document
1520        header.source_document = Some(DocumentRef::Manual);
1521
1522        // ISA 240 audit flags for batched entries (always manual)
1523        header.is_manual = true;
1524        header.source_system = if self.rng.random::<f64>() < 0.70 {
1525            "manual".to_string()
1526        } else {
1527            "spreadsheet".to_string()
1528        };
1529        header.is_post_close = posting_date.month() == self.end_date.month()
1530            && posting_date.year() == self.end_date.year()
1531            && posting_date.day() > 25;
1532        header.created_date =
1533            posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1534
1535        // Generate similar amount (within ±15% of base)
1536        let variation = self.rng.random_range(-0.15..0.15);
1537        let varied_amount =
1538            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1539        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1540
1541        // Create the entry
1542        let mut entry = JournalEntry::new(header);
1543
1544        // Use same debit account as batch base
1545        let debit_line = JournalEntryLine::debit(
1546            entry.header.document_id,
1547            1,
1548            batch.base_account_number.clone(),
1549            total_amount,
1550        );
1551        entry.add_line(debit_line);
1552
1553        // Select a credit account
1554        let credit_account = self.select_credit_account().account_number.clone();
1555        let credit_line =
1556            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1557        entry.add_line(credit_line);
1558
1559        // Enrich line items with account descriptions, cost centers, etc.
1560        self.enrich_line_items(&mut entry);
1561
1562        // Apply persona-based errors if enabled
1563        if self.persona_errors_enabled {
1564            self.maybe_inject_persona_error(&mut entry);
1565        }
1566
1567        // Apply approval workflow if enabled
1568        if self.approval_enabled {
1569            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1570        }
1571
1572        // Populate approved_by / approval_date from the approval workflow
1573        self.populate_approval_fields(&mut entry, posting_date);
1574
1575        // Clear batch state if no more entries remaining
1576        if batch.remaining <= 1 {
1577            self.batch_state = None;
1578        }
1579
1580        entry
1581    }
1582
1583    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1584    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1585        // Parse persona from the entry header
1586        let persona_str = &entry.header.user_persona;
1587        let persona = match persona_str.to_lowercase().as_str() {
1588            s if s.contains("junior") => UserPersona::JuniorAccountant,
1589            s if s.contains("senior") => UserPersona::SeniorAccountant,
1590            s if s.contains("controller") => UserPersona::Controller,
1591            s if s.contains("manager") => UserPersona::Manager,
1592            s if s.contains("executive") => UserPersona::Executive,
1593            _ => return, // Don't inject errors for unknown personas
1594        };
1595
1596        // Get base error rate from persona
1597        let base_error_rate = persona.error_rate();
1598
1599        // Apply stress factors based on posting date
1600        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1601
1602        // Check if error should occur based on adjusted rate
1603        if self.rng.random::<f64>() >= adjusted_rate {
1604            return; // No error this time
1605        }
1606
1607        // Select and inject persona-appropriate error
1608        self.inject_human_error(entry, persona);
1609    }
1610
1611    /// Apply contextual stress factors to the base error rate.
1612    ///
1613    /// Stress factors increase error likelihood during:
1614    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1615    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1616    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1617    /// - Monday morning (catch-up work): 20% more errors
1618    /// - Friday afternoon (rushing to leave): 30% more errors
1619    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1620        use chrono::Datelike;
1621
1622        let mut rate = base_rate;
1623        let day = posting_date.day();
1624        let month = posting_date.month();
1625
1626        // Year-end stress (December 28-31): double the error rate
1627        if month == 12 && day >= 28 {
1628            rate *= 2.0;
1629            return rate.min(0.5); // Cap at 50% to keep it realistic
1630        }
1631
1632        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1633        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1634            rate *= 1.75; // 75% more errors at quarter end
1635            return rate.min(0.4);
1636        }
1637
1638        // Month-end stress (last 3 days of month)
1639        if day >= 28 {
1640            rate *= 1.5; // 50% more errors at month end
1641        }
1642
1643        // Day-of-week stress effects
1644        let weekday = posting_date.weekday();
1645        match weekday {
1646            chrono::Weekday::Mon => {
1647                // Monday: catching up, often rushed
1648                rate *= 1.2;
1649            }
1650            chrono::Weekday::Fri => {
1651                // Friday: rushing to finish before weekend
1652                rate *= 1.3;
1653            }
1654            _ => {}
1655        }
1656
1657        // Cap at 40% to keep it realistic
1658        rate.min(0.4)
1659    }
1660
1661    /// Apply human-like variation to an amount.
1662    ///
1663    /// Humans don't enter perfectly calculated amounts - they:
1664    /// - Round amounts differently
1665    /// - Estimate instead of calculating exactly
1666    /// - Make small input variations
1667    ///
1668    /// This applies small variations (typically ±2%) to make amounts more realistic.
1669    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1670        use rust_decimal::Decimal;
1671
1672        // Automated transactions or very small amounts don't get variation
1673        if amount < Decimal::from(10) {
1674            return amount;
1675        }
1676
1677        // 70% chance of human variation being applied
1678        if self.rng.random::<f64>() > 0.70 {
1679            return amount;
1680        }
1681
1682        // Decide which type of human variation to apply
1683        let variation_type: u8 = self.rng.random_range(0..4);
1684
1685        match variation_type {
1686            0 => {
1687                // ±2% variation (common for estimated amounts)
1688                let variation_pct = self.rng.random_range(-0.02..0.02);
1689                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1690                (amount + variation).round_dp(2)
1691            }
1692            1 => {
1693                // Round to nearest $10
1694                let ten = Decimal::from(10);
1695                (amount / ten).round() * ten
1696            }
1697            2 => {
1698                // Round to nearest $100 (for larger amounts)
1699                if amount >= Decimal::from(500) {
1700                    let hundred = Decimal::from(100);
1701                    (amount / hundred).round() * hundred
1702                } else {
1703                    amount
1704                }
1705            }
1706            3 => {
1707                // Slight under/over payment (±$0.01 to ±$1.00)
1708                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1709                (amount + cents).max(Decimal::ZERO).round_dp(2)
1710            }
1711            _ => amount,
1712        }
1713    }
1714
1715    /// Rebalance an entry after a one-sided amount modification.
1716    ///
1717    /// When an error modifies one line's amount, this finds a line on the opposite
1718    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1719    /// same impact to maintain balance.
1720    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1721        // Find a line on the opposite side to adjust
1722        let balancing_idx = entry.lines.iter().position(|l| {
1723            if modified_was_debit {
1724                l.credit_amount > Decimal::ZERO
1725            } else {
1726                l.debit_amount > Decimal::ZERO
1727            }
1728        });
1729
1730        if let Some(idx) = balancing_idx {
1731            if modified_was_debit {
1732                entry.lines[idx].credit_amount += impact;
1733            } else {
1734                entry.lines[idx].debit_amount += impact;
1735            }
1736        }
1737    }
1738
1739    /// Inject a human-like error based on the persona.
1740    ///
1741    /// All error types maintain balance - amount modifications are applied to both sides.
1742    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1743    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1744        use rust_decimal::Decimal;
1745
1746        // Different personas make different types of errors
1747        let error_type: u8 = match persona {
1748            UserPersona::JuniorAccountant => {
1749                // Junior accountants make more varied errors
1750                self.rng.random_range(0..5)
1751            }
1752            UserPersona::SeniorAccountant => {
1753                // Senior accountants mainly make transposition errors
1754                self.rng.random_range(0..3)
1755            }
1756            UserPersona::Controller | UserPersona::Manager => {
1757                // Controllers/managers mainly make rounding or cutoff errors
1758                self.rng.random_range(3..5)
1759            }
1760            _ => return,
1761        };
1762
1763        match error_type {
1764            0 => {
1765                // Transposed digits in an amount
1766                if let Some(line) = entry.lines.get_mut(0) {
1767                    let is_debit = line.debit_amount > Decimal::ZERO;
1768                    let original_amount = if is_debit {
1769                        line.debit_amount
1770                    } else {
1771                        line.credit_amount
1772                    };
1773
1774                    // Simple digit swap in the string representation
1775                    let s = original_amount.to_string();
1776                    if s.len() >= 2 {
1777                        let chars: Vec<char> = s.chars().collect();
1778                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1779                        if chars[pos].is_ascii_digit()
1780                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1781                        {
1782                            let mut new_chars = chars;
1783                            new_chars.swap(pos, pos + 1);
1784                            if let Ok(new_amount) =
1785                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1786                            {
1787                                let impact = new_amount - original_amount;
1788
1789                                // Apply to the modified line
1790                                if is_debit {
1791                                    entry.lines[0].debit_amount = new_amount;
1792                                } else {
1793                                    entry.lines[0].credit_amount = new_amount;
1794                                }
1795
1796                                // Rebalance the entry
1797                                Self::rebalance_entry(entry, is_debit, impact);
1798
1799                                entry.header.header_text = Some(
1800                                    entry.header.header_text.clone().unwrap_or_default()
1801                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1802                                );
1803                            }
1804                        }
1805                    }
1806                }
1807            }
1808            1 => {
1809                // Wrong decimal place (off by factor of 10)
1810                if let Some(line) = entry.lines.get_mut(0) {
1811                    let is_debit = line.debit_amount > Decimal::ZERO;
1812                    let original_amount = if is_debit {
1813                        line.debit_amount
1814                    } else {
1815                        line.credit_amount
1816                    };
1817
1818                    let new_amount = original_amount * Decimal::new(10, 0);
1819                    let impact = new_amount - original_amount;
1820
1821                    // Apply to the modified line
1822                    if is_debit {
1823                        entry.lines[0].debit_amount = new_amount;
1824                    } else {
1825                        entry.lines[0].credit_amount = new_amount;
1826                    }
1827
1828                    // Rebalance the entry
1829                    Self::rebalance_entry(entry, is_debit, impact);
1830
1831                    entry.header.header_text = Some(
1832                        entry.header.header_text.clone().unwrap_or_default()
1833                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1834                    );
1835                }
1836            }
1837            2 => {
1838                // Typo in description (doesn't affect balance)
1839                if let Some(ref mut text) = entry.header.header_text {
1840                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1841                    let correct = ["the", "and", "with", "that", "receive"];
1842                    let idx = self.rng.random_range(0..typos.len());
1843                    if text.to_lowercase().contains(correct[idx]) {
1844                        *text = text.replace(correct[idx], typos[idx]);
1845                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1846                    }
1847                }
1848            }
1849            3 => {
1850                // Rounding to round number
1851                if let Some(line) = entry.lines.get_mut(0) {
1852                    let is_debit = line.debit_amount > Decimal::ZERO;
1853                    let original_amount = if is_debit {
1854                        line.debit_amount
1855                    } else {
1856                        line.credit_amount
1857                    };
1858
1859                    let new_amount =
1860                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1861                    let impact = new_amount - original_amount;
1862
1863                    // Apply to the modified line
1864                    if is_debit {
1865                        entry.lines[0].debit_amount = new_amount;
1866                    } else {
1867                        entry.lines[0].credit_amount = new_amount;
1868                    }
1869
1870                    // Rebalance the entry
1871                    Self::rebalance_entry(entry, is_debit, impact);
1872
1873                    entry.header.header_text = Some(
1874                        entry.header.header_text.clone().unwrap_or_default()
1875                            + " [HUMAN_ERROR:ROUNDED]",
1876                    );
1877                }
1878            }
1879            // Late posting marker (document date much earlier than posting
1880            // date). Doesn't create an imbalance.
1881            4 if entry.header.document_date == entry.header.posting_date => {
1882                let days_late = self.rng.random_range(5..15);
1883                entry.header.document_date =
1884                    entry.header.posting_date - chrono::Duration::days(days_late);
1885                entry.header.header_text = Some(
1886                    entry.header.header_text.clone().unwrap_or_default()
1887                        + " [HUMAN_ERROR:LATE_POSTING]",
1888                );
1889            }
1890            _ => {}
1891        }
1892    }
1893
1894    /// Apply approval workflow for high-value transactions.
1895    ///
1896    /// If the entry amount exceeds the approval threshold, simulate an
1897    /// approval workflow with appropriate approvers based on amount.
1898    fn maybe_apply_approval_workflow(
1899        &mut self,
1900        entry: &mut JournalEntry,
1901        _posting_date: NaiveDate,
1902    ) {
1903        use rust_decimal::Decimal;
1904
1905        let amount = entry.total_debit();
1906
1907        // Skip if amount is below threshold
1908        if amount <= self.approval_threshold {
1909            // Auto-approved below threshold
1910            let workflow = ApprovalWorkflow::auto_approved(
1911                entry.header.created_by.clone(),
1912                entry.header.user_persona.clone(),
1913                amount,
1914                entry.header.created_at,
1915            );
1916            entry.header.approval_workflow = Some(workflow);
1917            return;
1918        }
1919
1920        // Mark as SOX relevant for high-value transactions
1921        entry.header.sox_relevant = true;
1922
1923        // Determine required approval levels based on amount
1924        let required_levels = if amount > Decimal::new(100000, 0) {
1925            3 // Executive approval required
1926        } else if amount > Decimal::new(50000, 0) {
1927            2 // Senior management approval
1928        } else {
1929            1 // Manager approval
1930        };
1931
1932        // Create the approval workflow
1933        let mut workflow = ApprovalWorkflow::new(
1934            entry.header.created_by.clone(),
1935            entry.header.user_persona.clone(),
1936            amount,
1937        );
1938        workflow.required_levels = required_levels;
1939
1940        // Simulate submission
1941        let submit_time = entry.header.created_at;
1942        let submit_action = ApprovalAction::new(
1943            entry.header.created_by.clone(),
1944            entry.header.user_persona.clone(),
1945            self.parse_persona(&entry.header.user_persona),
1946            ApprovalActionType::Submit,
1947            0,
1948        )
1949        .with_timestamp(submit_time);
1950
1951        workflow.actions.push(submit_action);
1952        workflow.status = ApprovalStatus::Pending;
1953        workflow.submitted_at = Some(submit_time);
1954
1955        // Simulate approvals with realistic delays
1956        let mut current_time = submit_time;
1957        for level in 1..=required_levels {
1958            // Add delay for approval (1-3 business hours per level)
1959            let delay_hours = self.rng.random_range(1..4);
1960            current_time += chrono::Duration::hours(delay_hours);
1961
1962            // Skip weekends
1963            while current_time.weekday() == chrono::Weekday::Sat
1964                || current_time.weekday() == chrono::Weekday::Sun
1965            {
1966                current_time += chrono::Duration::days(1);
1967            }
1968
1969            // Generate approver based on level
1970            let (approver_id, approver_role) = self.select_approver(level);
1971
1972            let approve_action = ApprovalAction::new(
1973                approver_id.clone(),
1974                approver_role.to_string(),
1975                approver_role,
1976                ApprovalActionType::Approve,
1977                level,
1978            )
1979            .with_timestamp(current_time);
1980
1981            workflow.actions.push(approve_action);
1982            workflow.current_level = level;
1983        }
1984
1985        // Mark as approved
1986        workflow.status = ApprovalStatus::Approved;
1987        workflow.approved_at = Some(current_time);
1988
1989        entry.header.approval_workflow = Some(workflow);
1990    }
1991
1992    /// Select an approver based on the required level.
1993    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1994        let persona = match level {
1995            1 => UserPersona::Manager,
1996            2 => UserPersona::Controller,
1997            _ => UserPersona::Executive,
1998        };
1999
2000        // Try to get from user pool first
2001        if let Some(ref pool) = self.user_pool {
2002            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2003                return (user.user_id.clone(), persona);
2004            }
2005        }
2006
2007        // Fallback to generated approver
2008        let approver_id = match persona {
2009            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2010            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2011            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2012            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2013        };
2014
2015        (approver_id, persona)
2016    }
2017
2018    /// Parse user persona from string.
2019    fn parse_persona(&self, persona_str: &str) -> UserPersona {
2020        match persona_str.to_lowercase().as_str() {
2021            s if s.contains("junior") => UserPersona::JuniorAccountant,
2022            s if s.contains("senior") => UserPersona::SeniorAccountant,
2023            s if s.contains("controller") => UserPersona::Controller,
2024            s if s.contains("manager") => UserPersona::Manager,
2025            s if s.contains("executive") => UserPersona::Executive,
2026            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2027            _ => UserPersona::JuniorAccountant, // Default
2028        }
2029    }
2030
2031    /// Enable or disable approval workflow.
2032    pub fn with_approval(mut self, enabled: bool) -> Self {
2033        self.approval_enabled = enabled;
2034        self
2035    }
2036
2037    /// Set the approval threshold amount.
2038    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2039        self.approval_threshold = threshold;
2040        self
2041    }
2042
2043    /// Set the SOD violation rate for approval tracking.
2044    ///
2045    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
2046    /// that the approver is the same as the creator, which constitutes a SOD violation.
2047    /// Default is 0.10 (10%).
2048    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2049        self.sod_violation_rate = rate;
2050        self
2051    }
2052
2053    /// Populate `approved_by` and `approval_date` from the approval workflow,
2054    /// and flag SOD violations when the approver matches the creator.
2055    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2056        if let Some(ref workflow) = entry.header.approval_workflow {
2057            // Extract the last approver from the workflow actions
2058            let last_approver = workflow
2059                .actions
2060                .iter()
2061                .rev()
2062                .find(|a| matches!(a.action, ApprovalActionType::Approve));
2063
2064            if let Some(approver_action) = last_approver {
2065                entry.header.approved_by = Some(approver_action.actor_id.clone());
2066                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2067            } else {
2068                // No explicit approver (auto-approved); use the preparer
2069                entry.header.approved_by = Some(workflow.preparer_id.clone());
2070                entry.header.approval_date = Some(posting_date);
2071            }
2072
2073            // Inject SOD violation: with configured probability, set approver = creator
2074            if self.rng.random::<f64>() < self.sod_violation_rate {
2075                let creator = entry.header.created_by.clone();
2076                entry.header.approved_by = Some(creator);
2077                entry.header.sod_violation = true;
2078                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2079            }
2080        }
2081    }
2082
2083    /// Set the temporal drift controller for simulating distribution changes over time.
2084    ///
2085    /// When drift is enabled, amounts and other distributions will shift based on
2086    /// the period (month) to simulate realistic temporal evolution like inflation
2087    /// or increasing fraud rates.
2088    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2089        self.drift_controller = Some(controller);
2090        self
2091    }
2092
2093    /// Set drift configuration directly.
2094    ///
2095    /// Creates a drift controller from the config. Total periods is calculated
2096    /// from the date range.
2097    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2098        if config.enabled {
2099            let total_periods = self.calculate_total_periods();
2100            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2101        }
2102        self
2103    }
2104
2105    /// Calculate total periods (months) in the date range.
2106    fn calculate_total_periods(&self) -> u32 {
2107        let start_year = self.start_date.year();
2108        let start_month = self.start_date.month();
2109        let end_year = self.end_date.year();
2110        let end_month = self.end_date.month();
2111
2112        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2113    }
2114
2115    /// Calculate the period number (0-indexed) for a given date.
2116    fn date_to_period(&self, date: NaiveDate) -> u32 {
2117        let start_year = self.start_date.year();
2118        let start_month = self.start_date.month() as i32;
2119        let date_year = date.year();
2120        let date_month = date.month() as i32;
2121
2122        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2123    }
2124
2125    /// Get drift adjustments for a given date.
2126    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2127        if let Some(ref controller) = self.drift_controller {
2128            let period = self.date_to_period(date);
2129            controller.compute_adjustments(period)
2130        } else {
2131            DriftAdjustments::none()
2132        }
2133    }
2134
2135    /// Select a user from the pool or generate a generic user ID.
2136    #[inline]
2137    fn select_user(&mut self, is_automated: bool) -> (String, String) {
2138        if let Some(ref pool) = self.user_pool {
2139            let persona = if is_automated {
2140                UserPersona::AutomatedSystem
2141            } else {
2142                // Random distribution among human personas
2143                let roll: f64 = self.rng.random();
2144                if roll < 0.4 {
2145                    UserPersona::JuniorAccountant
2146                } else if roll < 0.7 {
2147                    UserPersona::SeniorAccountant
2148                } else if roll < 0.85 {
2149                    UserPersona::Controller
2150                } else {
2151                    UserPersona::Manager
2152                }
2153            };
2154
2155            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2156                return (user.user_id.clone(), user.persona.to_string());
2157            }
2158        }
2159
2160        // Fallback to generic format
2161        if is_automated {
2162            (
2163                format!("BATCH{:04}", self.rng.random_range(1..=20)),
2164                "automated_system".to_string(),
2165            )
2166        } else {
2167            (
2168                format!("USER{:04}", self.rng.random_range(1..=40)),
2169                "senior_accountant".to_string(),
2170            )
2171        }
2172    }
2173
2174    /// Select transaction source based on configuration weights.
2175    #[inline]
2176    fn select_source(&mut self) -> TransactionSource {
2177        let roll: f64 = self.rng.random();
2178        let dist = &self.config.source_distribution;
2179
2180        if roll < dist.manual {
2181            TransactionSource::Manual
2182        } else if roll < dist.manual + dist.automated {
2183            TransactionSource::Automated
2184        } else if roll < dist.manual + dist.automated + dist.recurring {
2185            TransactionSource::Recurring
2186        } else {
2187            TransactionSource::Adjustment
2188        }
2189    }
2190
2191    /// Select a business process based on configuration weights.
2192    #[inline]
2193    /// Map a business process to a SAP-style document type code.
2194    ///
2195    /// - P2P → "KR" (vendor invoice)
2196    /// - O2C → "DR" (customer invoice)
2197    /// - R2R → "SA" (general journal)
2198    /// - H2R → "HR" (HR posting)
2199    /// - A2R → "AA" (asset posting)
2200    /// - others → "SA"
2201    fn document_type_for_process(process: BusinessProcess) -> &'static str {
2202        match process {
2203            BusinessProcess::P2P => "KR",
2204            BusinessProcess::O2C => "DR",
2205            BusinessProcess::R2R => "SA",
2206            BusinessProcess::H2R => "HR",
2207            BusinessProcess::A2R => "AA",
2208            _ => "SA",
2209        }
2210    }
2211
2212    fn select_business_process(&mut self) -> BusinessProcess {
2213        *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2214    }
2215
2216    #[inline]
2217    fn select_debit_account(&mut self) -> &GLAccount {
2218        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2219        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2220
2221        // 60% asset, 40% expense for debits
2222        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2223            accounts
2224        } else {
2225            expense_accounts
2226        };
2227
2228        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2229            tracing::warn!(
2230                "Account selection returned empty list, falling back to first COA account"
2231            );
2232            &self.coa.accounts[0]
2233        })
2234    }
2235
2236    #[inline]
2237    fn select_credit_account(&mut self) -> &GLAccount {
2238        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2239        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2240
2241        // 60% liability, 40% revenue for credits
2242        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2243            liability_accounts
2244        } else {
2245            revenue_accounts
2246        };
2247
2248        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2249            tracing::warn!(
2250                "Account selection returned empty list, falling back to first COA account"
2251            );
2252            &self.coa.accounts[0]
2253        })
2254    }
2255}
2256
2257impl Generator for JournalEntryGenerator {
2258    type Item = JournalEntry;
2259    type Config = (
2260        TransactionConfig,
2261        Arc<ChartOfAccounts>,
2262        Vec<String>,
2263        NaiveDate,
2264        NaiveDate,
2265    );
2266
2267    fn new(config: Self::Config, seed: u64) -> Self {
2268        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2269    }
2270
2271    fn generate_one(&mut self) -> Self::Item {
2272        self.generate()
2273    }
2274
2275    fn reset(&mut self) {
2276        self.rng = seeded_rng(self.seed, 0);
2277        self.line_sampler.reset(self.seed + 1);
2278        self.amount_sampler.reset(self.seed + 2);
2279        self.temporal_sampler.reset(self.seed + 3);
2280        if let Some(ref mut adv) = self.advanced_amount_sampler {
2281            adv.reset(self.seed + 2);
2282        }
2283        self.count = 0;
2284        self.uuid_factory.reset();
2285
2286        // Reset reference generator by recreating it
2287        let mut ref_gen = ReferenceGenerator::new(
2288            self.start_date.year(),
2289            self.companies
2290                .first()
2291                .map(std::string::String::as_str)
2292                .unwrap_or("1000"),
2293        );
2294        ref_gen.set_prefix(
2295            ReferenceType::Invoice,
2296            &self.template_config.references.invoice_prefix,
2297        );
2298        ref_gen.set_prefix(
2299            ReferenceType::PurchaseOrder,
2300            &self.template_config.references.po_prefix,
2301        );
2302        ref_gen.set_prefix(
2303            ReferenceType::SalesOrder,
2304            &self.template_config.references.so_prefix,
2305        );
2306        self.reference_generator = ref_gen;
2307    }
2308
2309    fn count(&self) -> u64 {
2310        self.count
2311    }
2312
2313    fn seed(&self) -> u64 {
2314        self.seed
2315    }
2316}
2317
2318use datasynth_core::traits::ParallelGenerator;
2319
2320impl ParallelGenerator for JournalEntryGenerator {
2321    /// Split this generator into `parts` independent sub-generators.
2322    ///
2323    /// Each sub-generator gets a deterministic seed derived from the parent seed
2324    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2325    /// The results are deterministic for a given partition count.
2326    fn split(self, parts: usize) -> Vec<Self> {
2327        let parts = parts.max(1);
2328        (0..parts)
2329            .map(|i| {
2330                // Derive a unique seed per partition using a golden-ratio constant
2331                let sub_seed = self
2332                    .seed
2333                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2334
2335                let mut gen = JournalEntryGenerator::new_with_full_config(
2336                    self.config.clone(),
2337                    Arc::clone(&self.coa),
2338                    self.companies.clone(),
2339                    self.start_date,
2340                    self.end_date,
2341                    sub_seed,
2342                    self.template_config.clone(),
2343                    self.user_pool.clone(),
2344                );
2345
2346                // Copy over configuration state
2347                gen.company_selector = self.company_selector.clone();
2348                gen.vendor_pool = self.vendor_pool.clone();
2349                gen.customer_pool = self.customer_pool.clone();
2350                gen.material_pool = self.material_pool.clone();
2351                gen.using_real_master_data = self.using_real_master_data;
2352                gen.fraud_config = self.fraud_config.clone();
2353                gen.persona_errors_enabled = self.persona_errors_enabled;
2354                gen.approval_enabled = self.approval_enabled;
2355                gen.approval_threshold = self.approval_threshold;
2356                gen.sod_violation_rate = self.sod_violation_rate;
2357                // v3.4.0+: advanced amount sampler (mixture / Pareto /
2358                // Gaussian). Clone and reset the internal RNG with the
2359                // partition's sub_seed so each worker explores a unique
2360                // subsequence without repeating the parent stream.
2361                if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2362                    adv.reset(sub_seed.wrapping_add(2));
2363                    gen.advanced_amount_sampler = Some(adv);
2364                }
2365                // v3.5.3+: conditional amount override — clone + reset
2366                // so each partition gets a fresh deterministic stream.
2367                if let Some(mut cond) = self.conditional_amount_override.clone() {
2368                    cond.reset(sub_seed.wrapping_add(17));
2369                    gen.conditional_amount_override = Some(cond);
2370                }
2371                // v3.5.4+: copula sampler — clone + reset per partition.
2372                if let Some(mut cop) = self.correlation_copula.clone() {
2373                    cop.reset(sub_seed.wrapping_add(31));
2374                    gen.correlation_copula = Some(cop);
2375                }
2376
2377                // Use partitioned UUID factory to eliminate atomic contention
2378                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2379                    sub_seed,
2380                    GeneratorType::JournalEntry,
2381                    i as u8,
2382                );
2383
2384                // Copy temporal patterns if configured
2385                if let Some(ref config) = self.temporal_patterns_config {
2386                    gen.temporal_patterns_config = Some(config.clone());
2387                    // Rebuild business day calculator from the stored config
2388                    if config.business_days.enabled {
2389                        if let Some(ref bdc) = self.business_day_calculator {
2390                            gen.business_day_calculator = Some(bdc.clone());
2391                        }
2392                    }
2393                    // Rebuild processing lag calculator with partition seed
2394                    if config.processing_lags.enabled {
2395                        let lag_config =
2396                            Self::convert_processing_lag_config(&config.processing_lags);
2397                        gen.processing_lag_calculator =
2398                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2399                    }
2400                }
2401
2402                // Copy drift controller if present
2403                if let Some(ref dc) = self.drift_controller {
2404                    gen.drift_controller = Some(dc.clone());
2405                }
2406
2407                gen
2408            })
2409            .collect()
2410    }
2411}
2412
2413#[cfg(test)]
2414#[allow(clippy::unwrap_used)]
2415mod tests {
2416    use super::*;
2417    use crate::ChartOfAccountsGenerator;
2418
2419    #[test]
2420    fn test_generate_balanced_entries() {
2421        let mut coa_gen =
2422            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2423        let coa = Arc::new(coa_gen.generate());
2424
2425        let mut je_gen = JournalEntryGenerator::new_with_params(
2426            TransactionConfig::default(),
2427            coa,
2428            vec!["1000".to_string()],
2429            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2430            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2431            42,
2432        );
2433
2434        let mut balanced_count = 0;
2435        for _ in 0..100 {
2436            let entry = je_gen.generate();
2437
2438            // Skip entries with human errors as they may be intentionally unbalanced
2439            let has_human_error = entry
2440                .header
2441                .header_text
2442                .as_ref()
2443                .map(|t| t.contains("[HUMAN_ERROR:"))
2444                .unwrap_or(false);
2445
2446            if !has_human_error {
2447                assert!(
2448                    entry.is_balanced(),
2449                    "Entry {:?} is not balanced",
2450                    entry.header.document_id
2451                );
2452                balanced_count += 1;
2453            }
2454            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2455        }
2456
2457        // Ensure most entries are balanced (human errors are rare)
2458        assert!(
2459            balanced_count >= 80,
2460            "Expected at least 80 balanced entries, got {}",
2461            balanced_count
2462        );
2463    }
2464
2465    #[test]
2466    fn test_deterministic_generation() {
2467        let mut coa_gen =
2468            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2469        let coa = Arc::new(coa_gen.generate());
2470
2471        let mut gen1 = JournalEntryGenerator::new_with_params(
2472            TransactionConfig::default(),
2473            Arc::clone(&coa),
2474            vec!["1000".to_string()],
2475            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2476            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2477            42,
2478        );
2479
2480        let mut gen2 = JournalEntryGenerator::new_with_params(
2481            TransactionConfig::default(),
2482            coa,
2483            vec!["1000".to_string()],
2484            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2485            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2486            42,
2487        );
2488
2489        for _ in 0..50 {
2490            let e1 = gen1.generate();
2491            let e2 = gen2.generate();
2492            assert_eq!(e1.header.document_id, e2.header.document_id);
2493            assert_eq!(e1.total_debit(), e2.total_debit());
2494        }
2495    }
2496
2497    #[test]
2498    fn test_templates_generate_descriptions() {
2499        let mut coa_gen =
2500            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2501        let coa = Arc::new(coa_gen.generate());
2502
2503        // Enable all template features
2504        let template_config = TemplateConfig {
2505            names: datasynth_config::schema::NameTemplateConfig {
2506                generate_realistic_names: true,
2507                email_domain: "test.com".to_string(),
2508                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2509            },
2510            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2511                generate_header_text: true,
2512                generate_line_text: true,
2513            },
2514            references: datasynth_config::schema::ReferenceTemplateConfig {
2515                generate_references: true,
2516                invoice_prefix: "TEST-INV".to_string(),
2517                po_prefix: "TEST-PO".to_string(),
2518                so_prefix: "TEST-SO".to_string(),
2519            },
2520            path: None,
2521            merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2522        };
2523
2524        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2525            TransactionConfig::default(),
2526            coa,
2527            vec!["1000".to_string()],
2528            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2529            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2530            42,
2531            template_config,
2532            None,
2533        )
2534        .with_persona_errors(false); // Disable for template testing
2535
2536        for _ in 0..10 {
2537            let entry = je_gen.generate();
2538
2539            // Verify header text is populated
2540            assert!(
2541                entry.header.header_text.is_some(),
2542                "Header text should be populated"
2543            );
2544
2545            // Verify reference is populated
2546            assert!(
2547                entry.header.reference.is_some(),
2548                "Reference should be populated"
2549            );
2550
2551            // Verify business process is set
2552            assert!(
2553                entry.header.business_process.is_some(),
2554                "Business process should be set"
2555            );
2556
2557            // Verify line text is populated
2558            for line in &entry.lines {
2559                assert!(line.line_text.is_some(), "Line text should be populated");
2560            }
2561
2562            // Entry should still be balanced
2563            assert!(entry.is_balanced());
2564        }
2565    }
2566
2567    #[test]
2568    fn test_user_pool_integration() {
2569        let mut coa_gen =
2570            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2571        let coa = Arc::new(coa_gen.generate());
2572
2573        let companies = vec!["1000".to_string()];
2574
2575        // Generate user pool
2576        let mut user_gen = crate::UserGenerator::new(42);
2577        let user_pool = user_gen.generate_standard(&companies);
2578
2579        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2580            TransactionConfig::default(),
2581            coa,
2582            companies,
2583            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2584            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2585            42,
2586            TemplateConfig::default(),
2587            Some(user_pool),
2588        );
2589
2590        // Generate entries and verify user IDs are from pool
2591        for _ in 0..20 {
2592            let entry = je_gen.generate();
2593
2594            // User ID should not be generic BATCH/USER format when pool is used
2595            // (though it may still fall back if random selection misses)
2596            assert!(!entry.header.created_by.is_empty());
2597        }
2598    }
2599
2600    #[test]
2601    fn test_master_data_connection() {
2602        let mut coa_gen =
2603            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2604        let coa = Arc::new(coa_gen.generate());
2605
2606        // Create test vendors
2607        let vendors = vec![
2608            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2609            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2610        ];
2611
2612        // Create test customers
2613        let customers = vec![
2614            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2615            Customer::new(
2616                "C-TEST-002",
2617                "Test Customer Two",
2618                CustomerType::SmallBusiness,
2619            ),
2620        ];
2621
2622        // Create test materials
2623        let materials = vec![Material::new(
2624            "MAT-TEST-001",
2625            "Test Material A",
2626            MaterialType::RawMaterial,
2627        )];
2628
2629        // Create generator with master data
2630        let generator = JournalEntryGenerator::new_with_params(
2631            TransactionConfig::default(),
2632            coa,
2633            vec!["1000".to_string()],
2634            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2635            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2636            42,
2637        );
2638
2639        // Without master data
2640        assert!(!generator.is_using_real_master_data());
2641
2642        // Connect master data
2643        let generator_with_data = generator
2644            .with_vendors(&vendors)
2645            .with_customers(&customers)
2646            .with_materials(&materials);
2647
2648        // Should now be using real master data
2649        assert!(generator_with_data.is_using_real_master_data());
2650    }
2651
2652    #[test]
2653    fn test_with_master_data_convenience_method() {
2654        let mut coa_gen =
2655            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2656        let coa = Arc::new(coa_gen.generate());
2657
2658        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2659        let customers = vec![Customer::new(
2660            "C-001",
2661            "Customer One",
2662            CustomerType::Corporate,
2663        )];
2664        let materials = vec![Material::new(
2665            "MAT-001",
2666            "Material One",
2667            MaterialType::RawMaterial,
2668        )];
2669
2670        let generator = JournalEntryGenerator::new_with_params(
2671            TransactionConfig::default(),
2672            coa,
2673            vec!["1000".to_string()],
2674            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2675            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2676            42,
2677        )
2678        .with_master_data(&vendors, &customers, &materials);
2679
2680        assert!(generator.is_using_real_master_data());
2681    }
2682
2683    #[test]
2684    fn test_stress_factors_increase_error_rate() {
2685        let mut coa_gen =
2686            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2687        let coa = Arc::new(coa_gen.generate());
2688
2689        let generator = JournalEntryGenerator::new_with_params(
2690            TransactionConfig::default(),
2691            coa,
2692            vec!["1000".to_string()],
2693            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2694            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2695            42,
2696        );
2697
2698        let base_rate = 0.1;
2699
2700        // Regular day - no stress factors
2701        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2702        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2703        assert!(
2704            (regular_rate - base_rate).abs() < 0.01,
2705            "Regular day should have minimal stress factor adjustment"
2706        );
2707
2708        // Month end - 50% more errors
2709        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2710        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2711        assert!(
2712            month_end_rate > regular_rate,
2713            "Month end should have higher error rate than regular day"
2714        );
2715
2716        // Year end - double the error rate
2717        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2718        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2719        assert!(
2720            year_end_rate > month_end_rate,
2721            "Year end should have highest error rate"
2722        );
2723
2724        // Friday stress
2725        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2726        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2727        assert!(
2728            friday_rate > regular_rate,
2729            "Friday should have higher error rate than mid-week"
2730        );
2731
2732        // Monday stress
2733        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2734        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2735        assert!(
2736            monday_rate > regular_rate,
2737            "Monday should have higher error rate than mid-week"
2738        );
2739    }
2740
2741    #[test]
2742    fn test_batching_produces_similar_entries() {
2743        let mut coa_gen =
2744            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2745        let coa = Arc::new(coa_gen.generate());
2746
2747        // Use seed 123 which is more likely to trigger batching
2748        let mut je_gen = JournalEntryGenerator::new_with_params(
2749            TransactionConfig::default(),
2750            coa,
2751            vec!["1000".to_string()],
2752            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2753            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2754            123,
2755        )
2756        .with_persona_errors(false); // Disable to ensure balanced entries
2757
2758        // Generate many entries - at 15% batch rate, should see some batches
2759        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2760
2761        // Check that all entries are balanced (batched or not)
2762        for entry in &entries {
2763            assert!(
2764                entry.is_balanced(),
2765                "All entries including batched should be balanced"
2766            );
2767        }
2768
2769        // Count entries with same-day posting dates (batch indicator)
2770        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2771            std::collections::HashMap::new();
2772        for entry in &entries {
2773            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2774        }
2775
2776        // With batching, some dates should have multiple entries
2777        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2778        assert!(
2779            dates_with_multiple > 0,
2780            "With batching, should see some dates with multiple entries"
2781        );
2782    }
2783
2784    #[test]
2785    fn test_temporal_patterns_business_days() {
2786        use datasynth_config::schema::{
2787            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2788        };
2789
2790        let mut coa_gen =
2791            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2792        let coa = Arc::new(coa_gen.generate());
2793
2794        // Create temporal patterns config with business days enabled
2795        let temporal_config = TemporalPatternsConfig {
2796            enabled: true,
2797            business_days: BusinessDaySchemaConfig {
2798                enabled: true,
2799                ..Default::default()
2800            },
2801            calendars: CalendarSchemaConfig {
2802                regions: vec!["US".to_string()],
2803                custom_holidays: vec![],
2804            },
2805            ..Default::default()
2806        };
2807
2808        let mut je_gen = JournalEntryGenerator::new_with_params(
2809            TransactionConfig::default(),
2810            coa,
2811            vec!["1000".to_string()],
2812            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2813            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2814            42,
2815        )
2816        .with_temporal_patterns(temporal_config, 42)
2817        .with_persona_errors(false);
2818
2819        // Generate entries and verify none fall on weekends
2820        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2821
2822        for entry in &entries {
2823            let weekday = entry.header.posting_date.weekday();
2824            assert!(
2825                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2826                "Posting date {:?} should not be a weekend",
2827                entry.header.posting_date
2828            );
2829        }
2830    }
2831
2832    #[test]
2833    fn test_default_generation_filters_weekends() {
2834        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2835        // This tests the fix where new_with_full_config always creates a default
2836        // BusinessDayCalculator with US holidays as a fallback.
2837        let mut coa_gen =
2838            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2839        let coa = Arc::new(coa_gen.generate());
2840
2841        let mut je_gen = JournalEntryGenerator::new_with_params(
2842            TransactionConfig::default(),
2843            coa,
2844            vec!["1000".to_string()],
2845            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2846            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2847            42,
2848        )
2849        .with_persona_errors(false);
2850
2851        let total = 500;
2852        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2853
2854        let weekend_count = entries
2855            .iter()
2856            .filter(|e| {
2857                let wd = e.header.posting_date.weekday();
2858                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2859            })
2860            .count();
2861
2862        let weekend_pct = weekend_count as f64 / total as f64;
2863        assert!(
2864            weekend_pct < 0.05,
2865            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2866             but got {:.1}% ({}/{})",
2867            weekend_pct * 100.0,
2868            weekend_count,
2869            total
2870        );
2871    }
2872
2873    #[test]
2874    fn test_document_type_derived_from_business_process() {
2875        let mut coa_gen =
2876            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2877        let coa = Arc::new(coa_gen.generate());
2878
2879        let mut je_gen = JournalEntryGenerator::new_with_params(
2880            TransactionConfig::default(),
2881            coa,
2882            vec!["1000".to_string()],
2883            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2884            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2885            99,
2886        )
2887        .with_persona_errors(false)
2888        .with_batching(false);
2889
2890        let total = 200;
2891        let mut doc_types = std::collections::HashSet::new();
2892        let mut sa_count = 0_usize;
2893
2894        for _ in 0..total {
2895            let entry = je_gen.generate();
2896            let dt = &entry.header.document_type;
2897            doc_types.insert(dt.clone());
2898            if dt == "SA" {
2899                sa_count += 1;
2900            }
2901        }
2902
2903        // Should have more than 3 distinct document types
2904        assert!(
2905            doc_types.len() > 3,
2906            "Expected >3 distinct document types, got {} ({:?})",
2907            doc_types.len(),
2908            doc_types,
2909        );
2910
2911        // "SA" should be less than 50% (R2R is 20% of the weight)
2912        let sa_pct = sa_count as f64 / total as f64;
2913        assert!(
2914            sa_pct < 0.50,
2915            "Expected SA <50%, got {:.1}% ({}/{})",
2916            sa_pct * 100.0,
2917            sa_count,
2918            total,
2919        );
2920    }
2921
2922    #[test]
2923    fn test_enrich_line_items_account_description() {
2924        let mut coa_gen =
2925            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2926        let coa = Arc::new(coa_gen.generate());
2927
2928        let mut je_gen = JournalEntryGenerator::new_with_params(
2929            TransactionConfig::default(),
2930            coa,
2931            vec!["1000".to_string()],
2932            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2933            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2934            42,
2935        )
2936        .with_persona_errors(false);
2937
2938        let total = 200;
2939        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2940
2941        // Count lines with account_description populated
2942        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2943        let lines_with_desc: usize = entries
2944            .iter()
2945            .flat_map(|e| &e.lines)
2946            .filter(|l| l.account_description.is_some())
2947            .count();
2948
2949        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2950        assert!(
2951            desc_pct > 0.95,
2952            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2953            desc_pct * 100.0,
2954            lines_with_desc,
2955            total_lines,
2956        );
2957    }
2958
2959    #[test]
2960    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2961        let mut coa_gen =
2962            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2963        let coa = Arc::new(coa_gen.generate());
2964
2965        let mut je_gen = JournalEntryGenerator::new_with_params(
2966            TransactionConfig::default(),
2967            coa,
2968            vec!["1000".to_string()],
2969            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2970            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2971            42,
2972        )
2973        .with_persona_errors(false);
2974
2975        let total = 300;
2976        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2977
2978        // Count expense account lines (5xxx/6xxx) with cost_center populated
2979        let expense_lines: Vec<&JournalEntryLine> = entries
2980            .iter()
2981            .flat_map(|e| &e.lines)
2982            .filter(|l| {
2983                let first = l.gl_account.chars().next().unwrap_or('0');
2984                first == '5' || first == '6'
2985            })
2986            .collect();
2987
2988        if !expense_lines.is_empty() {
2989            let with_cc = expense_lines
2990                .iter()
2991                .filter(|l| l.cost_center.is_some())
2992                .count();
2993            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2994            assert!(
2995                cc_pct > 0.80,
2996                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2997                cc_pct * 100.0,
2998                with_cc,
2999                expense_lines.len(),
3000            );
3001        }
3002    }
3003
3004    #[test]
3005    fn test_enrich_line_items_profit_center_and_line_text() {
3006        let mut coa_gen =
3007            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3008        let coa = Arc::new(coa_gen.generate());
3009
3010        let mut je_gen = JournalEntryGenerator::new_with_params(
3011            TransactionConfig::default(),
3012            coa,
3013            vec!["1000".to_string()],
3014            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3015            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3016            42,
3017        )
3018        .with_persona_errors(false);
3019
3020        let total = 100;
3021        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3022
3023        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3024
3025        // All lines should have profit_center
3026        let with_pc = entries
3027            .iter()
3028            .flat_map(|e| &e.lines)
3029            .filter(|l| l.profit_center.is_some())
3030            .count();
3031        let pc_pct = with_pc as f64 / total_lines as f64;
3032        assert!(
3033            pc_pct > 0.95,
3034            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3035            pc_pct * 100.0,
3036            with_pc,
3037            total_lines,
3038        );
3039
3040        // All lines should have line_text (either from template or header fallback)
3041        let with_text = entries
3042            .iter()
3043            .flat_map(|e| &e.lines)
3044            .filter(|l| l.line_text.is_some())
3045            .count();
3046        let text_pct = with_text as f64 / total_lines as f64;
3047        assert!(
3048            text_pct > 0.95,
3049            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3050            text_pct * 100.0,
3051            with_text,
3052            total_lines,
3053        );
3054    }
3055
3056    // --- ISA 240 audit flag tests ---
3057
3058    #[test]
3059    fn test_je_has_audit_flags() {
3060        let mut coa_gen =
3061            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3062        let coa = Arc::new(coa_gen.generate());
3063
3064        let mut je_gen = JournalEntryGenerator::new_with_params(
3065            TransactionConfig::default(),
3066            coa,
3067            vec!["1000".to_string()],
3068            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3069            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3070            42,
3071        )
3072        .with_persona_errors(false);
3073
3074        for _ in 0..100 {
3075            let entry = je_gen.generate();
3076
3077            // source_system should always be non-empty
3078            assert!(
3079                !entry.header.source_system.is_empty(),
3080                "source_system should be populated, got empty string"
3081            );
3082
3083            // created_by should always be non-empty (already tested elsewhere, but confirm)
3084            assert!(
3085                !entry.header.created_by.is_empty(),
3086                "created_by should be populated"
3087            );
3088
3089            // created_date should always be populated
3090            assert!(
3091                entry.header.created_date.is_some(),
3092                "created_date should be populated"
3093            );
3094        }
3095    }
3096
3097    #[test]
3098    fn test_manual_entry_rate() {
3099        let mut coa_gen =
3100            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3101        let coa = Arc::new(coa_gen.generate());
3102
3103        let mut je_gen = JournalEntryGenerator::new_with_params(
3104            TransactionConfig::default(),
3105            coa,
3106            vec!["1000".to_string()],
3107            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3108            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3109            42,
3110        )
3111        .with_persona_errors(false)
3112        .with_batching(false);
3113
3114        let total = 1000;
3115        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3116
3117        let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3118        let manual_rate = manual_count as f64 / total as f64;
3119
3120        // Default source_distribution.manual is typically around 0.05-0.15
3121        // Allow a wide tolerance for statistical variation
3122        assert!(
3123            manual_rate > 0.01 && manual_rate < 0.50,
3124            "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3125            manual_rate * 100.0,
3126            manual_count,
3127            total,
3128        );
3129
3130        // is_manual should match TransactionSource::Manual
3131        for entry in &entries {
3132            let source_is_manual = entry.header.source == TransactionSource::Manual;
3133            assert_eq!(
3134                entry.header.is_manual, source_is_manual,
3135                "is_manual should match source == Manual"
3136            );
3137        }
3138    }
3139
3140    #[test]
3141    fn test_manual_source_consistency() {
3142        let mut coa_gen =
3143            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3144        let coa = Arc::new(coa_gen.generate());
3145
3146        let mut je_gen = JournalEntryGenerator::new_with_params(
3147            TransactionConfig::default(),
3148            coa,
3149            vec!["1000".to_string()],
3150            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3151            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3152            42,
3153        )
3154        .with_persona_errors(false)
3155        .with_batching(false);
3156
3157        for _ in 0..500 {
3158            let entry = je_gen.generate();
3159
3160            if entry.header.is_manual {
3161                // Manual entries must have source_system "manual" or "spreadsheet"
3162                assert!(
3163                    entry.header.source_system == "manual"
3164                        || entry.header.source_system == "spreadsheet",
3165                    "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
3166                    entry.header.source_system,
3167                );
3168            } else {
3169                // Non-manual entries must NOT have source_system "manual" or "spreadsheet"
3170                assert!(
3171                    entry.header.source_system != "manual"
3172                        && entry.header.source_system != "spreadsheet",
3173                    "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
3174                    entry.header.source_system,
3175                );
3176            }
3177        }
3178    }
3179
3180    #[test]
3181    fn test_created_date_before_posting() {
3182        let mut coa_gen =
3183            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3184        let coa = Arc::new(coa_gen.generate());
3185
3186        let mut je_gen = JournalEntryGenerator::new_with_params(
3187            TransactionConfig::default(),
3188            coa,
3189            vec!["1000".to_string()],
3190            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3191            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3192            42,
3193        )
3194        .with_persona_errors(false);
3195
3196        for _ in 0..500 {
3197            let entry = je_gen.generate();
3198
3199            if let Some(created_date) = entry.header.created_date {
3200                let created_naive_date = created_date.date();
3201                assert!(
3202                    created_naive_date <= entry.header.posting_date,
3203                    "created_date ({}) should be <= posting_date ({})",
3204                    created_naive_date,
3205                    entry.header.posting_date,
3206                );
3207            }
3208        }
3209    }
3210}