Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // Batching behavior - humans often process similar items together
66    batch_state: Option<BatchState>,
67    // Temporal drift controller for simulating distribution changes over time
68    drift_controller: Option<DriftController>,
69    // Temporal patterns components
70    business_day_calculator: Option<BusinessDayCalculator>,
71    processing_lag_calculator: Option<ProcessingLagCalculator>,
72    temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75/// State for tracking batch processing behavior.
76///
77/// When humans process transactions, they often batch similar items together
78/// (e.g., processing all invoices from one vendor, entering similar expenses).
79#[derive(Clone)]
80struct BatchState {
81    /// The base entry template to vary
82    base_account_number: String,
83    base_amount: rust_decimal::Decimal,
84    base_business_process: Option<BusinessProcess>,
85    base_posting_date: NaiveDate,
86    /// Remaining entries in this batch
87    remaining: u8,
88}
89
90impl JournalEntryGenerator {
91    /// Create a new journal entry generator.
92    pub fn new_with_params(
93        config: TransactionConfig,
94        coa: Arc<ChartOfAccounts>,
95        companies: Vec<String>,
96        start_date: NaiveDate,
97        end_date: NaiveDate,
98        seed: u64,
99    ) -> Self {
100        Self::new_with_full_config(
101            config,
102            coa,
103            companies,
104            start_date,
105            end_date,
106            seed,
107            TemplateConfig::default(),
108            None,
109        )
110    }
111
112    /// Create a new journal entry generator with full configuration.
113    #[allow(clippy::too_many_arguments)]
114    pub fn new_with_full_config(
115        config: TransactionConfig,
116        coa: Arc<ChartOfAccounts>,
117        companies: Vec<String>,
118        start_date: NaiveDate,
119        end_date: NaiveDate,
120        seed: u64,
121        template_config: TemplateConfig,
122        user_pool: Option<UserPool>,
123    ) -> Self {
124        // Initialize user pool if not provided
125        let user_pool = user_pool.or_else(|| {
126            if template_config.names.generate_realistic_names {
127                let user_gen_config = UserGeneratorConfig {
128                    culture_distribution: vec![
129                        (
130                            datasynth_core::templates::NameCulture::WesternUs,
131                            template_config.names.culture_distribution.western_us,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Hispanic,
135                            template_config.names.culture_distribution.hispanic,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::German,
139                            template_config.names.culture_distribution.german,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::French,
143                            template_config.names.culture_distribution.french,
144                        ),
145                        (
146                            datasynth_core::templates::NameCulture::Chinese,
147                            template_config.names.culture_distribution.chinese,
148                        ),
149                        (
150                            datasynth_core::templates::NameCulture::Japanese,
151                            template_config.names.culture_distribution.japanese,
152                        ),
153                        (
154                            datasynth_core::templates::NameCulture::Indian,
155                            template_config.names.culture_distribution.indian,
156                        ),
157                    ],
158                    email_domain: template_config.names.email_domain.clone(),
159                    generate_realistic_names: true,
160                };
161                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162                Some(user_gen.generate_standard(&companies))
163            } else {
164                None
165            }
166        });
167
168        // Initialize reference generator
169        let mut ref_gen = ReferenceGenerator::new(
170            start_date.year(),
171            companies
172                .first()
173                .map(std::string::String::as_str)
174                .unwrap_or("1000"),
175        );
176        ref_gen.set_prefix(
177            ReferenceType::Invoice,
178            &template_config.references.invoice_prefix,
179        );
180        ref_gen.set_prefix(
181            ReferenceType::PurchaseOrder,
182            &template_config.references.po_prefix,
183        );
184        ref_gen.set_prefix(
185            ReferenceType::SalesOrder,
186            &template_config.references.so_prefix,
187        );
188
189        // Create weighted company selector (uniform weights for this constructor)
190        let company_selector = WeightedCompanySelector::uniform(companies.clone());
191
192        Self {
193            rng: seeded_rng(seed, 0),
194            seed,
195            config: config.clone(),
196            coa,
197            companies,
198            company_selector,
199            line_sampler: LineItemSampler::with_config(
200                seed + 1,
201                config.line_item_distribution.clone(),
202                config.even_odd_distribution.clone(),
203                config.debit_credit_distribution.clone(),
204            ),
205            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
206            temporal_sampler: TemporalSampler::with_config(
207                seed + 3,
208                config.seasonality.clone(),
209                WorkingHoursConfig::default(),
210                Vec::new(),
211            ),
212            start_date,
213            end_date,
214            count: 0,
215            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
216            user_pool,
217            description_generator: DescriptionGenerator::new(),
218            reference_generator: ref_gen,
219            template_config,
220            vendor_pool: VendorPool::standard(),
221            customer_pool: CustomerPool::standard(),
222            material_pool: None,
223            using_real_master_data: false,
224            fraud_config: FraudConfig::default(),
225            persona_errors_enabled: true, // Enable by default for realism
226            approval_enabled: true,       // Enable by default for realism
227            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
228            batch_state: None,
229            drift_controller: None,
230            // Always provide a basic BusinessDayCalculator so that weekend/holiday
231            // filtering is active even when temporal_patterns is not explicitly enabled.
232            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
233                Region::US,
234                start_date.year(),
235            ))),
236            processing_lag_calculator: None,
237            temporal_patterns_config: None,
238        }
239    }
240
241    /// Create from a full GeneratorConfig.
242    ///
243    /// This constructor uses the volume_weight from company configs
244    /// for weighted company selection, and fraud config from GeneratorConfig.
245    pub fn from_generator_config(
246        full_config: &GeneratorConfig,
247        coa: Arc<ChartOfAccounts>,
248        start_date: NaiveDate,
249        end_date: NaiveDate,
250        seed: u64,
251    ) -> Self {
252        let companies: Vec<String> = full_config
253            .companies
254            .iter()
255            .map(|c| c.code.clone())
256            .collect();
257
258        // Create weighted selector using volume_weight from company configs
259        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
260
261        let mut generator = Self::new_with_full_config(
262            full_config.transactions.clone(),
263            coa,
264            companies,
265            start_date,
266            end_date,
267            seed,
268            full_config.templates.clone(),
269            None,
270        );
271
272        // Override the uniform selector with weighted selector
273        generator.company_selector = company_selector;
274
275        // Set fraud config
276        generator.fraud_config = full_config.fraud.clone();
277
278        // Configure temporal patterns if enabled
279        let temporal_config = &full_config.temporal_patterns;
280        if temporal_config.enabled {
281            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
282        }
283
284        generator
285    }
286
287    /// Configure temporal patterns including business day calculations and processing lags.
288    ///
289    /// This enables realistic temporal behavior including:
290    /// - Business day awareness (no postings on weekends/holidays)
291    /// - Processing lag modeling (event-to-posting delays)
292    /// - Period-end dynamics (volume spikes at month/quarter/year end)
293    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
294        // Create business day calculator if enabled
295        if config.business_days.enabled {
296            let region = config
297                .calendars
298                .regions
299                .first()
300                .map(|r| Self::parse_region(r))
301                .unwrap_or(Region::US);
302
303            let calendar = HolidayCalendar::new(region, self.start_date.year());
304            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
305        }
306
307        // Create processing lag calculator if enabled
308        if config.processing_lags.enabled {
309            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
310            self.processing_lag_calculator =
311                Some(ProcessingLagCalculator::with_config(seed, lag_config));
312        }
313
314        // Create period-end dynamics if configured
315        let model = config.period_end.model.as_deref().unwrap_or("flat");
316        if model != "flat"
317            || config
318                .period_end
319                .month_end
320                .as_ref()
321                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
322        {
323            let dynamics = Self::convert_period_end_config(&config.period_end);
324            self.temporal_sampler.set_period_end_dynamics(dynamics);
325        }
326
327        self.temporal_patterns_config = Some(config);
328        self
329    }
330
331    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
332    ///
333    /// This is an alternative to [`with_temporal_patterns`] that derives the
334    /// holiday calendar from a country-pack definition rather than the built-in
335    /// region-based calendars.  All other temporal behaviour (business-day
336    /// adjustment, processing lags, period-end dynamics) is configured
337    /// identically.
338    pub fn with_country_pack_temporal(
339        mut self,
340        config: TemporalPatternsConfig,
341        seed: u64,
342        pack: &CountryPack,
343    ) -> Self {
344        // Create business day calculator using the country pack calendar
345        if config.business_days.enabled {
346            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
347            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
348        }
349
350        // Create processing lag calculator if enabled
351        if config.processing_lags.enabled {
352            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
353            self.processing_lag_calculator =
354                Some(ProcessingLagCalculator::with_config(seed, lag_config));
355        }
356
357        // Create period-end dynamics if configured
358        let model = config.period_end.model.as_deref().unwrap_or("flat");
359        if model != "flat"
360            || config
361                .period_end
362                .month_end
363                .as_ref()
364                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
365        {
366            let dynamics = Self::convert_period_end_config(&config.period_end);
367            self.temporal_sampler.set_period_end_dynamics(dynamics);
368        }
369
370        self.temporal_patterns_config = Some(config);
371        self
372    }
373
374    /// Convert schema processing lag config to core config.
375    fn convert_processing_lag_config(
376        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
377    ) -> ProcessingLagConfig {
378        let mut config = ProcessingLagConfig {
379            enabled: schema.enabled,
380            ..Default::default()
381        };
382
383        // Helper to convert lag schema to distribution
384        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
385            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
386            if let Some(min) = lag.min_hours {
387                dist.min_lag_hours = min;
388            }
389            if let Some(max) = lag.max_hours {
390                dist.max_lag_hours = max;
391            }
392            dist
393        };
394
395        // Apply event-specific lags
396        if let Some(ref lag) = schema.sales_order_lag {
397            config
398                .event_lags
399                .insert(EventType::SalesOrder, convert_lag(lag));
400        }
401        if let Some(ref lag) = schema.purchase_order_lag {
402            config
403                .event_lags
404                .insert(EventType::PurchaseOrder, convert_lag(lag));
405        }
406        if let Some(ref lag) = schema.goods_receipt_lag {
407            config
408                .event_lags
409                .insert(EventType::GoodsReceipt, convert_lag(lag));
410        }
411        if let Some(ref lag) = schema.invoice_receipt_lag {
412            config
413                .event_lags
414                .insert(EventType::InvoiceReceipt, convert_lag(lag));
415        }
416        if let Some(ref lag) = schema.invoice_issue_lag {
417            config
418                .event_lags
419                .insert(EventType::InvoiceIssue, convert_lag(lag));
420        }
421        if let Some(ref lag) = schema.payment_lag {
422            config
423                .event_lags
424                .insert(EventType::Payment, convert_lag(lag));
425        }
426        if let Some(ref lag) = schema.journal_entry_lag {
427            config
428                .event_lags
429                .insert(EventType::JournalEntry, convert_lag(lag));
430        }
431
432        // Apply cross-day posting config
433        if let Some(ref cross_day) = schema.cross_day_posting {
434            config.cross_day = CrossDayConfig {
435                enabled: cross_day.enabled,
436                probability_by_hour: cross_day.probability_by_hour.clone(),
437                ..Default::default()
438            };
439        }
440
441        config
442    }
443
444    /// Convert schema period-end config to core PeriodEndDynamics.
445    fn convert_period_end_config(
446        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
447    ) -> PeriodEndDynamics {
448        let model_type = schema.model.as_deref().unwrap_or("exponential");
449
450        // Helper to convert period config
451        let convert_period =
452            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
453             default_peak: f64|
454             -> PeriodEndConfig {
455                if let Some(p) = period {
456                    let model = match model_type {
457                        "flat" => PeriodEndModel::FlatMultiplier {
458                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
459                        },
460                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
461                            start_day: p.start_day.unwrap_or(-10),
462                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
463                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
464                            ramp_up_days: 3, // Default ramp-up period
465                        },
466                        _ => PeriodEndModel::ExponentialAcceleration {
467                            start_day: p.start_day.unwrap_or(-10),
468                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
469                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
470                            decay_rate: p.decay_rate.unwrap_or(0.3),
471                        },
472                    };
473                    PeriodEndConfig {
474                        enabled: true,
475                        model,
476                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
477                    }
478                } else {
479                    PeriodEndConfig {
480                        enabled: true,
481                        model: PeriodEndModel::ExponentialAcceleration {
482                            start_day: -10,
483                            base_multiplier: 1.0,
484                            peak_multiplier: default_peak,
485                            decay_rate: 0.3,
486                        },
487                        additional_multiplier: 1.0,
488                    }
489                }
490            };
491
492        PeriodEndDynamics::new(
493            convert_period(schema.month_end.as_ref(), 2.0),
494            convert_period(schema.quarter_end.as_ref(), 3.5),
495            convert_period(schema.year_end.as_ref(), 5.0),
496        )
497    }
498
499    /// Parse a region string into a Region enum.
500    fn parse_region(region_str: &str) -> Region {
501        match region_str.to_uppercase().as_str() {
502            "US" => Region::US,
503            "DE" => Region::DE,
504            "GB" => Region::GB,
505            "CN" => Region::CN,
506            "JP" => Region::JP,
507            "IN" => Region::IN,
508            "BR" => Region::BR,
509            "MX" => Region::MX,
510            "AU" => Region::AU,
511            "SG" => Region::SG,
512            "KR" => Region::KR,
513            "FR" => Region::FR,
514            "IT" => Region::IT,
515            "ES" => Region::ES,
516            "CA" => Region::CA,
517            _ => Region::US,
518        }
519    }
520
521    /// Set a custom company selector.
522    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
523        self.company_selector = selector;
524    }
525
526    /// Get the current company selector.
527    pub fn company_selector(&self) -> &WeightedCompanySelector {
528        &self.company_selector
529    }
530
531    /// Set fraud configuration.
532    pub fn set_fraud_config(&mut self, config: FraudConfig) {
533        self.fraud_config = config;
534    }
535
536    /// Set vendors from generated master data.
537    ///
538    /// This replaces the default vendor pool with actual generated vendors,
539    /// ensuring JEs reference real master data entities.
540    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
541        if !vendors.is_empty() {
542            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
543            self.using_real_master_data = true;
544        }
545        self
546    }
547
548    /// Set customers from generated master data.
549    ///
550    /// This replaces the default customer pool with actual generated customers,
551    /// ensuring JEs reference real master data entities.
552    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
553        if !customers.is_empty() {
554            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
555            self.using_real_master_data = true;
556        }
557        self
558    }
559
560    /// Set materials from generated master data.
561    ///
562    /// This provides material references for JEs that involve inventory movements.
563    pub fn with_materials(mut self, materials: &[Material]) -> Self {
564        if !materials.is_empty() {
565            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
566            self.using_real_master_data = true;
567        }
568        self
569    }
570
571    /// Set all master data at once for convenience.
572    ///
573    /// This is the recommended way to configure the JE generator with
574    /// generated master data to ensure data coherence.
575    pub fn with_master_data(
576        self,
577        vendors: &[Vendor],
578        customers: &[Customer],
579        materials: &[Material],
580    ) -> Self {
581        self.with_vendors(vendors)
582            .with_customers(customers)
583            .with_materials(materials)
584    }
585
586    /// Replace the user pool with one generated from a [`CountryPack`].
587    ///
588    /// This is an alternative to the default name-culture distribution that
589    /// derives name pools and weights from the country-pack's `names` section.
590    /// The existing user pool (if any) is discarded and regenerated using
591    /// [`MultiCultureNameGenerator::from_country_pack`].
592    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
593        let name_gen =
594            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
595        let config = UserGeneratorConfig {
596            // The culture distribution is embedded in the name generator
597            // itself, so we use an empty list here.
598            culture_distribution: Vec::new(),
599            email_domain: name_gen.email_domain().to_string(),
600            generate_realistic_names: true,
601        };
602        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
603        self.user_pool = Some(user_gen.generate_standard(&self.companies));
604        self
605    }
606
607    /// Check if the generator is using real master data.
608    pub fn is_using_real_master_data(&self) -> bool {
609        self.using_real_master_data
610    }
611
612    /// Determine if this transaction should be fraudulent.
613    fn determine_fraud(&mut self) -> Option<FraudType> {
614        if !self.fraud_config.enabled {
615            return None;
616        }
617
618        // Roll for fraud based on fraud rate
619        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
620            return None;
621        }
622
623        // Select fraud type based on distribution
624        Some(self.select_fraud_type())
625    }
626
627    /// Select a fraud type based on the configured distribution.
628    fn select_fraud_type(&mut self) -> FraudType {
629        let dist = &self.fraud_config.fraud_type_distribution;
630        let roll: f64 = self.rng.random();
631
632        let mut cumulative = 0.0;
633
634        cumulative += dist.suspense_account_abuse;
635        if roll < cumulative {
636            return FraudType::SuspenseAccountAbuse;
637        }
638
639        cumulative += dist.fictitious_transaction;
640        if roll < cumulative {
641            return FraudType::FictitiousTransaction;
642        }
643
644        cumulative += dist.revenue_manipulation;
645        if roll < cumulative {
646            return FraudType::RevenueManipulation;
647        }
648
649        cumulative += dist.expense_capitalization;
650        if roll < cumulative {
651            return FraudType::ExpenseCapitalization;
652        }
653
654        cumulative += dist.split_transaction;
655        if roll < cumulative {
656            return FraudType::SplitTransaction;
657        }
658
659        cumulative += dist.timing_anomaly;
660        if roll < cumulative {
661            return FraudType::TimingAnomaly;
662        }
663
664        cumulative += dist.unauthorized_access;
665        if roll < cumulative {
666            return FraudType::UnauthorizedAccess;
667        }
668
669        // Default fallback
670        FraudType::DuplicatePayment
671    }
672
673    /// Map a fraud type to an amount pattern for suspicious amounts.
674    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
675        match fraud_type {
676            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
677                FraudAmountPattern::ThresholdAdjacent
678            }
679            FraudType::FictitiousTransaction
680            | FraudType::FictitiousEntry
681            | FraudType::SuspenseAccountAbuse
682            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
683            FraudType::RevenueManipulation
684            | FraudType::ExpenseCapitalization
685            | FraudType::ImproperCapitalization
686            | FraudType::ReserveManipulation
687            | FraudType::UnauthorizedAccess
688            | FraudType::PrematureRevenue
689            | FraudType::UnderstatedLiabilities
690            | FraudType::OverstatedAssets
691            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
692            FraudType::DuplicatePayment
693            | FraudType::TimingAnomaly
694            | FraudType::SelfApproval
695            | FraudType::ExceededApprovalLimit
696            | FraudType::SegregationOfDutiesViolation
697            | FraudType::UnauthorizedApproval
698            | FraudType::CollusiveApproval
699            | FraudType::FictitiousVendor
700            | FraudType::ShellCompanyPayment
701            | FraudType::Kickback
702            | FraudType::KickbackScheme
703            | FraudType::InvoiceManipulation
704            | FraudType::AssetMisappropriation
705            | FraudType::InventoryTheft
706            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
707            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
708            FraudType::ImproperRevenueRecognition
709            | FraudType::ImproperPoAllocation
710            | FraudType::VariableConsiderationManipulation
711            | FraudType::ContractModificationMisstatement => {
712                FraudAmountPattern::StatisticallyImprobable
713            }
714            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
715            FraudType::LeaseClassificationManipulation
716            | FraudType::OffBalanceSheetLease
717            | FraudType::LeaseLiabilityUnderstatement
718            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
719            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
720            FraudType::FairValueHierarchyManipulation
721            | FraudType::Level3InputManipulation
722            | FraudType::ValuationTechniqueManipulation => {
723                FraudAmountPattern::StatisticallyImprobable
724            }
725            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
726            FraudType::DelayedImpairment
727            | FraudType::ImpairmentTestAvoidance
728            | FraudType::CashFlowProjectionManipulation
729            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
730            // Sourcing/Procurement Fraud
731            FraudType::BidRigging
732            | FraudType::PhantomVendorContract
733            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
734            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
735            // HR/Payroll Fraud
736            FraudType::GhostEmployeePayroll
737            | FraudType::PayrollInflation
738            | FraudType::DuplicateExpenseReport
739            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
740            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
741            // O2C Fraud
742            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
743            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
744        }
745    }
746
747    /// Generate a deterministic UUID using the factory.
748    #[inline]
749    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
750        self.uuid_factory.next()
751    }
752
753    /// Cost center pool used for expense account enrichment.
754    const COST_CENTER_POOL: &'static [&'static str] =
755        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
756
757    /// Enrich journal entry line items with account descriptions, cost centers,
758    /// profit centers, value dates, line text, and assignment fields.
759    ///
760    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
761    /// and `::credit()` leave as `None`.
762    fn enrich_line_items(&self, entry: &mut JournalEntry) {
763        let posting_date = entry.header.posting_date;
764        let company_code = &entry.header.company_code;
765        let header_text = entry.header.header_text.clone();
766        let business_process = entry.header.business_process;
767
768        // Derive a deterministic index from the document_id for cost center selection
769        let doc_id_bytes = entry.header.document_id.as_bytes();
770        let mut cc_seed: usize = 0;
771        for &b in doc_id_bytes {
772            cc_seed = cc_seed.wrapping_add(b as usize);
773        }
774
775        for (i, line) in entry.lines.iter_mut().enumerate() {
776            // 1. account_description: look up from CoA
777            if line.account_description.is_none() {
778                line.account_description = self
779                    .coa
780                    .get_account(&line.gl_account)
781                    .map(|a| a.short_description.clone());
782            }
783
784            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
785            if line.cost_center.is_none() {
786                let first_char = line.gl_account.chars().next().unwrap_or('0');
787                if first_char == '5' || first_char == '6' {
788                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
789                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
790                }
791            }
792
793            // 3. profit_center: derive from company code + business process
794            if line.profit_center.is_none() {
795                let suffix = match business_process {
796                    Some(BusinessProcess::P2P) => "-P2P",
797                    Some(BusinessProcess::O2C) => "-O2C",
798                    Some(BusinessProcess::R2R) => "-R2R",
799                    Some(BusinessProcess::H2R) => "-H2R",
800                    _ => "",
801                };
802                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
803            }
804
805            // 4. line_text: fall back to header_text if not already set
806            if line.line_text.is_none() {
807                line.line_text = header_text.clone();
808            }
809
810            // 5. value_date: set to posting_date for AR/AP accounts
811            if line.value_date.is_none()
812                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
813            {
814                line.value_date = Some(posting_date);
815            }
816
817            // 6. assignment: set to vendor/customer reference for AP/AR lines
818            if line.assignment.is_none() {
819                if line.gl_account.starts_with("2000") {
820                    // AP line - use vendor reference from header
821                    if let Some(ref ht) = header_text {
822                        // Try to extract vendor ID from header text patterns like "... - V-001"
823                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
824                            if vendor_part.starts_with("V-")
825                                || vendor_part.starts_with("VENDOR")
826                                || vendor_part.starts_with("Vendor")
827                            {
828                                line.assignment = Some(vendor_part.to_string());
829                            }
830                        }
831                    }
832                } else if line.gl_account.starts_with("1100") {
833                    // AR line - use customer reference from header
834                    if let Some(ref ht) = header_text {
835                        if let Some(customer_part) = ht.rsplit(" - ").next() {
836                            if customer_part.starts_with("C-")
837                                || customer_part.starts_with("CUST")
838                                || customer_part.starts_with("Customer")
839                            {
840                                line.assignment = Some(customer_part.to_string());
841                            }
842                        }
843                    }
844                }
845            }
846        }
847    }
848
849    /// Generate a single journal entry.
850    pub fn generate(&mut self) -> JournalEntry {
851        debug!(
852            count = self.count,
853            companies = self.companies.len(),
854            start_date = %self.start_date,
855            end_date = %self.end_date,
856            "Generating journal entry"
857        );
858
859        // Check if we're in a batch - if so, generate a batched entry
860        if let Some(ref state) = self.batch_state {
861            if state.remaining > 0 {
862                return self.generate_batched_entry();
863            }
864        }
865
866        self.count += 1;
867
868        // Generate deterministic document ID
869        let document_id = self.generate_deterministic_uuid();
870
871        // Sample posting date
872        let mut posting_date = self
873            .temporal_sampler
874            .sample_date(self.start_date, self.end_date);
875
876        // Adjust posting date to be a business day if business day calculator is configured
877        if let Some(ref calc) = self.business_day_calculator {
878            if !calc.is_business_day(posting_date) {
879                // Move to next business day
880                posting_date = calc.next_business_day(posting_date, false);
881                // Ensure we don't exceed end_date
882                if posting_date > self.end_date {
883                    posting_date = calc.prev_business_day(self.end_date, true);
884                }
885            }
886        }
887
888        // Select company using weighted selector
889        let company_code = self.company_selector.select(&mut self.rng).to_string();
890
891        // Sample line item specification
892        let line_spec = self.line_sampler.sample();
893
894        // Determine source type using full 4-way distribution
895        let source = self.select_source();
896        let is_automated = matches!(
897            source,
898            TransactionSource::Automated | TransactionSource::Recurring
899        );
900
901        // Select business process
902        let business_process = self.select_business_process();
903
904        // Determine if this is a fraudulent transaction
905        let fraud_type = self.determine_fraud();
906        let is_fraud = fraud_type.is_some();
907
908        // Sample time based on source
909        let time = self.temporal_sampler.sample_time(!is_automated);
910        let created_at = posting_date.and_time(time).and_utc();
911
912        // Select user from pool or generate generic
913        let (created_by, user_persona) = self.select_user(is_automated);
914
915        // Create header with deterministic UUID
916        let mut header =
917            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
918        header.created_at = created_at;
919        header.source = source;
920        header.created_by = created_by;
921        header.user_persona = user_persona;
922        header.business_process = Some(business_process);
923        header.document_type = Self::document_type_for_process(business_process).to_string();
924        header.is_fraud = is_fraud;
925        header.fraud_type = fraud_type;
926
927        // Generate description context
928        let mut context =
929            DescriptionContext::with_period(posting_date.month(), posting_date.year());
930
931        // Add vendor/customer context based on business process
932        match business_process {
933            BusinessProcess::P2P => {
934                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
935                    context.vendor_name = Some(vendor.name.clone());
936                }
937            }
938            BusinessProcess::O2C => {
939                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
940                    context.customer_name = Some(customer.name.clone());
941                }
942            }
943            _ => {}
944        }
945
946        // Generate header text if enabled
947        if self.template_config.descriptions.generate_header_text {
948            header.header_text = Some(self.description_generator.generate_header_text(
949                business_process,
950                &context,
951                &mut self.rng,
952            ));
953        }
954
955        // Generate reference if enabled
956        if self.template_config.references.generate_references {
957            header.reference = Some(
958                self.reference_generator
959                    .generate_for_process_year(business_process, posting_date.year()),
960            );
961        }
962
963        // Generate line items
964        let mut entry = JournalEntry::new(header);
965
966        // Generate amount - use fraud pattern if this is a fraudulent transaction
967        let base_amount = if let Some(ft) = fraud_type {
968            let pattern = self.fraud_type_to_amount_pattern(ft);
969            self.amount_sampler.sample_fraud(pattern)
970        } else {
971            self.amount_sampler.sample()
972        };
973
974        // Apply temporal drift if configured
975        let drift_adjusted_amount = {
976            let drift = self.get_drift_adjustments(posting_date);
977            if drift.amount_mean_multiplier != 1.0 {
978                // Apply drift multiplier (includes seasonal factor if enabled)
979                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
980                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
981                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
982            } else {
983                base_amount
984            }
985        };
986
987        // Apply human variation to amounts for non-automated transactions
988        let total_amount = if is_automated {
989            drift_adjusted_amount // Automated systems use exact amounts
990        } else {
991            self.apply_human_variation(drift_adjusted_amount)
992        };
993
994        // Generate debit lines
995        let debit_amounts = self
996            .amount_sampler
997            .sample_summing_to(line_spec.debit_count, total_amount);
998        for (i, amount) in debit_amounts.into_iter().enumerate() {
999            let account_number = self.select_debit_account().account_number.clone();
1000            let mut line = JournalEntryLine::debit(
1001                entry.header.document_id,
1002                (i + 1) as u32,
1003                account_number.clone(),
1004                amount,
1005            );
1006
1007            // Generate line text if enabled
1008            if self.template_config.descriptions.generate_line_text {
1009                line.line_text = Some(self.description_generator.generate_line_text(
1010                    &account_number,
1011                    &context,
1012                    &mut self.rng,
1013                ));
1014            }
1015
1016            entry.add_line(line);
1017        }
1018
1019        // Generate credit lines - use the SAME amounts to ensure balance
1020        let credit_amounts = self
1021            .amount_sampler
1022            .sample_summing_to(line_spec.credit_count, total_amount);
1023        for (i, amount) in credit_amounts.into_iter().enumerate() {
1024            let account_number = self.select_credit_account().account_number.clone();
1025            let mut line = JournalEntryLine::credit(
1026                entry.header.document_id,
1027                (line_spec.debit_count + i + 1) as u32,
1028                account_number.clone(),
1029                amount,
1030            );
1031
1032            // Generate line text if enabled
1033            if self.template_config.descriptions.generate_line_text {
1034                line.line_text = Some(self.description_generator.generate_line_text(
1035                    &account_number,
1036                    &context,
1037                    &mut self.rng,
1038                ));
1039            }
1040
1041            entry.add_line(line);
1042        }
1043
1044        // Enrich line items with account descriptions, cost centers, etc.
1045        self.enrich_line_items(&mut entry);
1046
1047        // Apply persona-based errors if enabled and it's a human user
1048        if self.persona_errors_enabled && !is_automated {
1049            self.maybe_inject_persona_error(&mut entry);
1050        }
1051
1052        // Apply approval workflow if enabled and amount exceeds threshold
1053        if self.approval_enabled {
1054            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1055        }
1056
1057        // Maybe start a batch of similar entries for realism
1058        self.maybe_start_batch(&entry);
1059
1060        entry
1061    }
1062
1063    /// Enable or disable persona-based error injection.
1064    ///
1065    /// When enabled, entries created by human personas have a chance
1066    /// to contain realistic human errors based on their experience level.
1067    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1068        self.persona_errors_enabled = enabled;
1069        self
1070    }
1071
1072    /// Set fraud configuration for fraud injection.
1073    ///
1074    /// When fraud is enabled in the config, transactions have a chance
1075    /// to be marked as fraudulent based on the configured fraud rate.
1076    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1077        self.fraud_config = config;
1078        self
1079    }
1080
1081    /// Check if persona errors are enabled.
1082    pub fn persona_errors_enabled(&self) -> bool {
1083        self.persona_errors_enabled
1084    }
1085
1086    /// Enable or disable batch processing behavior.
1087    ///
1088    /// When enabled (default), the generator will occasionally produce batches
1089    /// of similar entries, simulating how humans batch similar work together.
1090    pub fn with_batching(mut self, enabled: bool) -> Self {
1091        if !enabled {
1092            self.batch_state = None;
1093        }
1094        self
1095    }
1096
1097    /// Check if batch processing is enabled.
1098    pub fn batching_enabled(&self) -> bool {
1099        // Batching is implicitly enabled when not explicitly disabled
1100        true
1101    }
1102
1103    /// Maybe start a batch based on the current entry.
1104    ///
1105    /// Humans often batch similar work: processing invoices from one vendor,
1106    /// entering expense reports for a trip, reconciling similar items.
1107    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1108        // Only start batch for non-automated, non-fraud entries
1109        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1110            return;
1111        }
1112
1113        // 15% chance to start a batch (most work is not batched)
1114        if self.rng.random::<f64>() > 0.15 {
1115            return;
1116        }
1117
1118        // Extract key attributes for batching
1119        let base_account = entry
1120            .lines
1121            .first()
1122            .map(|l| l.gl_account.clone())
1123            .unwrap_or_default();
1124
1125        let base_amount = entry.total_debit();
1126
1127        self.batch_state = Some(BatchState {
1128            base_account_number: base_account,
1129            base_amount,
1130            base_business_process: entry.header.business_process,
1131            base_posting_date: entry.header.posting_date,
1132            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1133        });
1134    }
1135
1136    /// Generate an entry that's part of the current batch.
1137    ///
1138    /// Batched entries have:
1139    /// - Same or very similar business process
1140    /// - Same posting date (batched work done together)
1141    /// - Similar amounts (within ±15%)
1142    /// - Same debit account (processing similar items)
1143    fn generate_batched_entry(&mut self) -> JournalEntry {
1144        use rust_decimal::Decimal;
1145
1146        // Decrement batch counter
1147        if let Some(ref mut state) = self.batch_state {
1148            state.remaining = state.remaining.saturating_sub(1);
1149        }
1150
1151        let Some(batch) = self.batch_state.clone() else {
1152            // This is a programming error - batch_state should be set before calling this method.
1153            // Clear state and fall back to generating a standard entry instead of panicking.
1154            tracing::warn!(
1155                "generate_batched_entry called without batch_state; generating standard entry"
1156            );
1157            self.batch_state = None;
1158            return self.generate();
1159        };
1160
1161        // Use the batch's posting date (work done on same day)
1162        let posting_date = batch.base_posting_date;
1163
1164        self.count += 1;
1165        let document_id = self.generate_deterministic_uuid();
1166
1167        // Select same company (batched work is usually same company)
1168        let company_code = self.company_selector.select(&mut self.rng).to_string();
1169
1170        // Use simplified line spec for batched entries (usually 2-line)
1171        let _line_spec = LineItemSpec {
1172            total_count: 2,
1173            debit_count: 1,
1174            credit_count: 1,
1175            split_type: DebitCreditSplit::Equal,
1176        };
1177
1178        // Batched entries are always manual
1179        let source = TransactionSource::Manual;
1180
1181        // Use the batch's business process
1182        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1183
1184        // Sample time
1185        let time = self.temporal_sampler.sample_time(true);
1186        let created_at = posting_date.and_time(time).and_utc();
1187
1188        // Same user for batched work
1189        let (created_by, user_persona) = self.select_user(false);
1190
1191        // Create header
1192        let mut header =
1193            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1194        header.created_at = created_at;
1195        header.source = source;
1196        header.created_by = created_by;
1197        header.user_persona = user_persona;
1198        header.business_process = Some(business_process);
1199        header.document_type = Self::document_type_for_process(business_process).to_string();
1200
1201        // Generate similar amount (within ±15% of base)
1202        let variation = self.rng.random_range(-0.15..0.15);
1203        let varied_amount =
1204            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1205        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1206
1207        // Create the entry
1208        let mut entry = JournalEntry::new(header);
1209
1210        // Use same debit account as batch base
1211        let debit_line = JournalEntryLine::debit(
1212            entry.header.document_id,
1213            1,
1214            batch.base_account_number.clone(),
1215            total_amount,
1216        );
1217        entry.add_line(debit_line);
1218
1219        // Select a credit account
1220        let credit_account = self.select_credit_account().account_number.clone();
1221        let credit_line =
1222            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1223        entry.add_line(credit_line);
1224
1225        // Enrich line items with account descriptions, cost centers, etc.
1226        self.enrich_line_items(&mut entry);
1227
1228        // Apply persona-based errors if enabled
1229        if self.persona_errors_enabled {
1230            self.maybe_inject_persona_error(&mut entry);
1231        }
1232
1233        // Apply approval workflow if enabled
1234        if self.approval_enabled {
1235            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1236        }
1237
1238        // Clear batch state if no more entries remaining
1239        if batch.remaining <= 1 {
1240            self.batch_state = None;
1241        }
1242
1243        entry
1244    }
1245
1246    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1247    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1248        // Parse persona from the entry header
1249        let persona_str = &entry.header.user_persona;
1250        let persona = match persona_str.to_lowercase().as_str() {
1251            s if s.contains("junior") => UserPersona::JuniorAccountant,
1252            s if s.contains("senior") => UserPersona::SeniorAccountant,
1253            s if s.contains("controller") => UserPersona::Controller,
1254            s if s.contains("manager") => UserPersona::Manager,
1255            s if s.contains("executive") => UserPersona::Executive,
1256            _ => return, // Don't inject errors for unknown personas
1257        };
1258
1259        // Get base error rate from persona
1260        let base_error_rate = persona.error_rate();
1261
1262        // Apply stress factors based on posting date
1263        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1264
1265        // Check if error should occur based on adjusted rate
1266        if self.rng.random::<f64>() >= adjusted_rate {
1267            return; // No error this time
1268        }
1269
1270        // Select and inject persona-appropriate error
1271        self.inject_human_error(entry, persona);
1272    }
1273
1274    /// Apply contextual stress factors to the base error rate.
1275    ///
1276    /// Stress factors increase error likelihood during:
1277    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1278    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1279    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1280    /// - Monday morning (catch-up work): 20% more errors
1281    /// - Friday afternoon (rushing to leave): 30% more errors
1282    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1283        use chrono::Datelike;
1284
1285        let mut rate = base_rate;
1286        let day = posting_date.day();
1287        let month = posting_date.month();
1288
1289        // Year-end stress (December 28-31): double the error rate
1290        if month == 12 && day >= 28 {
1291            rate *= 2.0;
1292            return rate.min(0.5); // Cap at 50% to keep it realistic
1293        }
1294
1295        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1296        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1297            rate *= 1.75; // 75% more errors at quarter end
1298            return rate.min(0.4);
1299        }
1300
1301        // Month-end stress (last 3 days of month)
1302        if day >= 28 {
1303            rate *= 1.5; // 50% more errors at month end
1304        }
1305
1306        // Day-of-week stress effects
1307        let weekday = posting_date.weekday();
1308        match weekday {
1309            chrono::Weekday::Mon => {
1310                // Monday: catching up, often rushed
1311                rate *= 1.2;
1312            }
1313            chrono::Weekday::Fri => {
1314                // Friday: rushing to finish before weekend
1315                rate *= 1.3;
1316            }
1317            _ => {}
1318        }
1319
1320        // Cap at 40% to keep it realistic
1321        rate.min(0.4)
1322    }
1323
1324    /// Apply human-like variation to an amount.
1325    ///
1326    /// Humans don't enter perfectly calculated amounts - they:
1327    /// - Round amounts differently
1328    /// - Estimate instead of calculating exactly
1329    /// - Make small input variations
1330    ///
1331    /// This applies small variations (typically ±2%) to make amounts more realistic.
1332    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1333        use rust_decimal::Decimal;
1334
1335        // Automated transactions or very small amounts don't get variation
1336        if amount < Decimal::from(10) {
1337            return amount;
1338        }
1339
1340        // 70% chance of human variation being applied
1341        if self.rng.random::<f64>() > 0.70 {
1342            return amount;
1343        }
1344
1345        // Decide which type of human variation to apply
1346        let variation_type: u8 = self.rng.random_range(0..4);
1347
1348        match variation_type {
1349            0 => {
1350                // ±2% variation (common for estimated amounts)
1351                let variation_pct = self.rng.random_range(-0.02..0.02);
1352                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1353                (amount + variation).round_dp(2)
1354            }
1355            1 => {
1356                // Round to nearest $10
1357                let ten = Decimal::from(10);
1358                (amount / ten).round() * ten
1359            }
1360            2 => {
1361                // Round to nearest $100 (for larger amounts)
1362                if amount >= Decimal::from(500) {
1363                    let hundred = Decimal::from(100);
1364                    (amount / hundred).round() * hundred
1365                } else {
1366                    amount
1367                }
1368            }
1369            3 => {
1370                // Slight under/over payment (±$0.01 to ±$1.00)
1371                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1372                (amount + cents).max(Decimal::ZERO).round_dp(2)
1373            }
1374            _ => amount,
1375        }
1376    }
1377
1378    /// Rebalance an entry after a one-sided amount modification.
1379    ///
1380    /// When an error modifies one line's amount, this finds a line on the opposite
1381    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1382    /// same impact to maintain balance.
1383    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1384        // Find a line on the opposite side to adjust
1385        let balancing_idx = entry.lines.iter().position(|l| {
1386            if modified_was_debit {
1387                l.credit_amount > Decimal::ZERO
1388            } else {
1389                l.debit_amount > Decimal::ZERO
1390            }
1391        });
1392
1393        if let Some(idx) = balancing_idx {
1394            if modified_was_debit {
1395                entry.lines[idx].credit_amount += impact;
1396            } else {
1397                entry.lines[idx].debit_amount += impact;
1398            }
1399        }
1400    }
1401
1402    /// Inject a human-like error based on the persona.
1403    ///
1404    /// All error types maintain balance - amount modifications are applied to both sides.
1405    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1406    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1407        use rust_decimal::Decimal;
1408
1409        // Different personas make different types of errors
1410        let error_type: u8 = match persona {
1411            UserPersona::JuniorAccountant => {
1412                // Junior accountants make more varied errors
1413                self.rng.random_range(0..5)
1414            }
1415            UserPersona::SeniorAccountant => {
1416                // Senior accountants mainly make transposition errors
1417                self.rng.random_range(0..3)
1418            }
1419            UserPersona::Controller | UserPersona::Manager => {
1420                // Controllers/managers mainly make rounding or cutoff errors
1421                self.rng.random_range(3..5)
1422            }
1423            _ => return,
1424        };
1425
1426        match error_type {
1427            0 => {
1428                // Transposed digits in an amount
1429                if let Some(line) = entry.lines.get_mut(0) {
1430                    let is_debit = line.debit_amount > Decimal::ZERO;
1431                    let original_amount = if is_debit {
1432                        line.debit_amount
1433                    } else {
1434                        line.credit_amount
1435                    };
1436
1437                    // Simple digit swap in the string representation
1438                    let s = original_amount.to_string();
1439                    if s.len() >= 2 {
1440                        let chars: Vec<char> = s.chars().collect();
1441                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1442                        if chars[pos].is_ascii_digit()
1443                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1444                        {
1445                            let mut new_chars = chars;
1446                            new_chars.swap(pos, pos + 1);
1447                            if let Ok(new_amount) =
1448                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1449                            {
1450                                let impact = new_amount - original_amount;
1451
1452                                // Apply to the modified line
1453                                if is_debit {
1454                                    entry.lines[0].debit_amount = new_amount;
1455                                } else {
1456                                    entry.lines[0].credit_amount = new_amount;
1457                                }
1458
1459                                // Rebalance the entry
1460                                Self::rebalance_entry(entry, is_debit, impact);
1461
1462                                entry.header.header_text = Some(
1463                                    entry.header.header_text.clone().unwrap_or_default()
1464                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1465                                );
1466                            }
1467                        }
1468                    }
1469                }
1470            }
1471            1 => {
1472                // Wrong decimal place (off by factor of 10)
1473                if let Some(line) = entry.lines.get_mut(0) {
1474                    let is_debit = line.debit_amount > Decimal::ZERO;
1475                    let original_amount = if is_debit {
1476                        line.debit_amount
1477                    } else {
1478                        line.credit_amount
1479                    };
1480
1481                    let new_amount = original_amount * Decimal::new(10, 0);
1482                    let impact = new_amount - original_amount;
1483
1484                    // Apply to the modified line
1485                    if is_debit {
1486                        entry.lines[0].debit_amount = new_amount;
1487                    } else {
1488                        entry.lines[0].credit_amount = new_amount;
1489                    }
1490
1491                    // Rebalance the entry
1492                    Self::rebalance_entry(entry, is_debit, impact);
1493
1494                    entry.header.header_text = Some(
1495                        entry.header.header_text.clone().unwrap_or_default()
1496                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1497                    );
1498                }
1499            }
1500            2 => {
1501                // Typo in description (doesn't affect balance)
1502                if let Some(ref mut text) = entry.header.header_text {
1503                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1504                    let correct = ["the", "and", "with", "that", "receive"];
1505                    let idx = self.rng.random_range(0..typos.len());
1506                    if text.to_lowercase().contains(correct[idx]) {
1507                        *text = text.replace(correct[idx], typos[idx]);
1508                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1509                    }
1510                }
1511            }
1512            3 => {
1513                // Rounding to round number
1514                if let Some(line) = entry.lines.get_mut(0) {
1515                    let is_debit = line.debit_amount > Decimal::ZERO;
1516                    let original_amount = if is_debit {
1517                        line.debit_amount
1518                    } else {
1519                        line.credit_amount
1520                    };
1521
1522                    let new_amount =
1523                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1524                    let impact = new_amount - original_amount;
1525
1526                    // Apply to the modified line
1527                    if is_debit {
1528                        entry.lines[0].debit_amount = new_amount;
1529                    } else {
1530                        entry.lines[0].credit_amount = new_amount;
1531                    }
1532
1533                    // Rebalance the entry
1534                    Self::rebalance_entry(entry, is_debit, impact);
1535
1536                    entry.header.header_text = Some(
1537                        entry.header.header_text.clone().unwrap_or_default()
1538                            + " [HUMAN_ERROR:ROUNDED]",
1539                    );
1540                }
1541            }
1542            4 => {
1543                // Late posting marker (document date much earlier than posting date)
1544                // This doesn't create an imbalance
1545                if entry.header.document_date == entry.header.posting_date {
1546                    let days_late = self.rng.random_range(5..15);
1547                    entry.header.document_date =
1548                        entry.header.posting_date - chrono::Duration::days(days_late);
1549                    entry.header.header_text = Some(
1550                        entry.header.header_text.clone().unwrap_or_default()
1551                            + " [HUMAN_ERROR:LATE_POSTING]",
1552                    );
1553                }
1554            }
1555            _ => {}
1556        }
1557    }
1558
1559    /// Apply approval workflow for high-value transactions.
1560    ///
1561    /// If the entry amount exceeds the approval threshold, simulate an
1562    /// approval workflow with appropriate approvers based on amount.
1563    fn maybe_apply_approval_workflow(
1564        &mut self,
1565        entry: &mut JournalEntry,
1566        _posting_date: NaiveDate,
1567    ) {
1568        use rust_decimal::Decimal;
1569
1570        let amount = entry.total_debit();
1571
1572        // Skip if amount is below threshold
1573        if amount <= self.approval_threshold {
1574            // Auto-approved below threshold
1575            let workflow = ApprovalWorkflow::auto_approved(
1576                entry.header.created_by.clone(),
1577                entry.header.user_persona.clone(),
1578                amount,
1579                entry.header.created_at,
1580            );
1581            entry.header.approval_workflow = Some(workflow);
1582            return;
1583        }
1584
1585        // Mark as SOX relevant for high-value transactions
1586        entry.header.sox_relevant = true;
1587
1588        // Determine required approval levels based on amount
1589        let required_levels = if amount > Decimal::new(100000, 0) {
1590            3 // Executive approval required
1591        } else if amount > Decimal::new(50000, 0) {
1592            2 // Senior management approval
1593        } else {
1594            1 // Manager approval
1595        };
1596
1597        // Create the approval workflow
1598        let mut workflow = ApprovalWorkflow::new(
1599            entry.header.created_by.clone(),
1600            entry.header.user_persona.clone(),
1601            amount,
1602        );
1603        workflow.required_levels = required_levels;
1604
1605        // Simulate submission
1606        let submit_time = entry.header.created_at;
1607        let submit_action = ApprovalAction::new(
1608            entry.header.created_by.clone(),
1609            entry.header.user_persona.clone(),
1610            self.parse_persona(&entry.header.user_persona),
1611            ApprovalActionType::Submit,
1612            0,
1613        )
1614        .with_timestamp(submit_time);
1615
1616        workflow.actions.push(submit_action);
1617        workflow.status = ApprovalStatus::Pending;
1618        workflow.submitted_at = Some(submit_time);
1619
1620        // Simulate approvals with realistic delays
1621        let mut current_time = submit_time;
1622        for level in 1..=required_levels {
1623            // Add delay for approval (1-3 business hours per level)
1624            let delay_hours = self.rng.random_range(1..4);
1625            current_time += chrono::Duration::hours(delay_hours);
1626
1627            // Skip weekends
1628            while current_time.weekday() == chrono::Weekday::Sat
1629                || current_time.weekday() == chrono::Weekday::Sun
1630            {
1631                current_time += chrono::Duration::days(1);
1632            }
1633
1634            // Generate approver based on level
1635            let (approver_id, approver_role) = self.select_approver(level);
1636
1637            let approve_action = ApprovalAction::new(
1638                approver_id.clone(),
1639                approver_role.to_string(),
1640                approver_role,
1641                ApprovalActionType::Approve,
1642                level,
1643            )
1644            .with_timestamp(current_time);
1645
1646            workflow.actions.push(approve_action);
1647            workflow.current_level = level;
1648        }
1649
1650        // Mark as approved
1651        workflow.status = ApprovalStatus::Approved;
1652        workflow.approved_at = Some(current_time);
1653
1654        entry.header.approval_workflow = Some(workflow);
1655    }
1656
1657    /// Select an approver based on the required level.
1658    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1659        let persona = match level {
1660            1 => UserPersona::Manager,
1661            2 => UserPersona::Controller,
1662            _ => UserPersona::Executive,
1663        };
1664
1665        // Try to get from user pool first
1666        if let Some(ref pool) = self.user_pool {
1667            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1668                return (user.user_id.clone(), persona);
1669            }
1670        }
1671
1672        // Fallback to generated approver
1673        let approver_id = match persona {
1674            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1675            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1676            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1677            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1678        };
1679
1680        (approver_id, persona)
1681    }
1682
1683    /// Parse user persona from string.
1684    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1685        match persona_str.to_lowercase().as_str() {
1686            s if s.contains("junior") => UserPersona::JuniorAccountant,
1687            s if s.contains("senior") => UserPersona::SeniorAccountant,
1688            s if s.contains("controller") => UserPersona::Controller,
1689            s if s.contains("manager") => UserPersona::Manager,
1690            s if s.contains("executive") => UserPersona::Executive,
1691            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1692            _ => UserPersona::JuniorAccountant, // Default
1693        }
1694    }
1695
1696    /// Enable or disable approval workflow.
1697    pub fn with_approval(mut self, enabled: bool) -> Self {
1698        self.approval_enabled = enabled;
1699        self
1700    }
1701
1702    /// Set the approval threshold amount.
1703    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1704        self.approval_threshold = threshold;
1705        self
1706    }
1707
1708    /// Set the temporal drift controller for simulating distribution changes over time.
1709    ///
1710    /// When drift is enabled, amounts and other distributions will shift based on
1711    /// the period (month) to simulate realistic temporal evolution like inflation
1712    /// or increasing fraud rates.
1713    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1714        self.drift_controller = Some(controller);
1715        self
1716    }
1717
1718    /// Set drift configuration directly.
1719    ///
1720    /// Creates a drift controller from the config. Total periods is calculated
1721    /// from the date range.
1722    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1723        if config.enabled {
1724            let total_periods = self.calculate_total_periods();
1725            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1726        }
1727        self
1728    }
1729
1730    /// Calculate total periods (months) in the date range.
1731    fn calculate_total_periods(&self) -> u32 {
1732        let start_year = self.start_date.year();
1733        let start_month = self.start_date.month();
1734        let end_year = self.end_date.year();
1735        let end_month = self.end_date.month();
1736
1737        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1738    }
1739
1740    /// Calculate the period number (0-indexed) for a given date.
1741    fn date_to_period(&self, date: NaiveDate) -> u32 {
1742        let start_year = self.start_date.year();
1743        let start_month = self.start_date.month() as i32;
1744        let date_year = date.year();
1745        let date_month = date.month() as i32;
1746
1747        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1748    }
1749
1750    /// Get drift adjustments for a given date.
1751    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1752        if let Some(ref controller) = self.drift_controller {
1753            let period = self.date_to_period(date);
1754            controller.compute_adjustments(period)
1755        } else {
1756            DriftAdjustments::none()
1757        }
1758    }
1759
1760    /// Select a user from the pool or generate a generic user ID.
1761    #[inline]
1762    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1763        if let Some(ref pool) = self.user_pool {
1764            let persona = if is_automated {
1765                UserPersona::AutomatedSystem
1766            } else {
1767                // Random distribution among human personas
1768                let roll: f64 = self.rng.random();
1769                if roll < 0.4 {
1770                    UserPersona::JuniorAccountant
1771                } else if roll < 0.7 {
1772                    UserPersona::SeniorAccountant
1773                } else if roll < 0.85 {
1774                    UserPersona::Controller
1775                } else {
1776                    UserPersona::Manager
1777                }
1778            };
1779
1780            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1781                return (user.user_id.clone(), user.persona.to_string());
1782            }
1783        }
1784
1785        // Fallback to generic format
1786        if is_automated {
1787            (
1788                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1789                "automated_system".to_string(),
1790            )
1791        } else {
1792            (
1793                format!("USER{:04}", self.rng.random_range(1..=40)),
1794                "senior_accountant".to_string(),
1795            )
1796        }
1797    }
1798
1799    /// Select transaction source based on configuration weights.
1800    #[inline]
1801    fn select_source(&mut self) -> TransactionSource {
1802        let roll: f64 = self.rng.random();
1803        let dist = &self.config.source_distribution;
1804
1805        if roll < dist.manual {
1806            TransactionSource::Manual
1807        } else if roll < dist.manual + dist.automated {
1808            TransactionSource::Automated
1809        } else if roll < dist.manual + dist.automated + dist.recurring {
1810            TransactionSource::Recurring
1811        } else {
1812            TransactionSource::Adjustment
1813        }
1814    }
1815
1816    /// Select a business process based on configuration weights.
1817    #[inline]
1818    /// Map a business process to a SAP-style document type code.
1819    ///
1820    /// - P2P → "KR" (vendor invoice)
1821    /// - O2C → "DR" (customer invoice)
1822    /// - R2R → "SA" (general journal)
1823    /// - H2R → "HR" (HR posting)
1824    /// - A2R → "AA" (asset posting)
1825    /// - others → "SA"
1826    fn document_type_for_process(process: BusinessProcess) -> &'static str {
1827        match process {
1828            BusinessProcess::P2P => "KR",
1829            BusinessProcess::O2C => "DR",
1830            BusinessProcess::R2R => "SA",
1831            BusinessProcess::H2R => "HR",
1832            BusinessProcess::A2R => "AA",
1833            _ => "SA",
1834        }
1835    }
1836
1837    fn select_business_process(&mut self) -> BusinessProcess {
1838        let roll: f64 = self.rng.random();
1839
1840        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1841        if roll < 0.35 {
1842            BusinessProcess::O2C
1843        } else if roll < 0.65 {
1844            BusinessProcess::P2P
1845        } else if roll < 0.85 {
1846            BusinessProcess::R2R
1847        } else if roll < 0.95 {
1848            BusinessProcess::H2R
1849        } else {
1850            BusinessProcess::A2R
1851        }
1852    }
1853
1854    #[inline]
1855    fn select_debit_account(&mut self) -> &GLAccount {
1856        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1857        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1858
1859        // 60% asset, 40% expense for debits
1860        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1861            accounts
1862        } else {
1863            expense_accounts
1864        };
1865
1866        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1867            tracing::warn!(
1868                "Account selection returned empty list, falling back to first COA account"
1869            );
1870            &self.coa.accounts[0]
1871        })
1872    }
1873
1874    #[inline]
1875    fn select_credit_account(&mut self) -> &GLAccount {
1876        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1877        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1878
1879        // 60% liability, 40% revenue for credits
1880        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1881            liability_accounts
1882        } else {
1883            revenue_accounts
1884        };
1885
1886        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1887            tracing::warn!(
1888                "Account selection returned empty list, falling back to first COA account"
1889            );
1890            &self.coa.accounts[0]
1891        })
1892    }
1893}
1894
1895impl Generator for JournalEntryGenerator {
1896    type Item = JournalEntry;
1897    type Config = (
1898        TransactionConfig,
1899        Arc<ChartOfAccounts>,
1900        Vec<String>,
1901        NaiveDate,
1902        NaiveDate,
1903    );
1904
1905    fn new(config: Self::Config, seed: u64) -> Self {
1906        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1907    }
1908
1909    fn generate_one(&mut self) -> Self::Item {
1910        self.generate()
1911    }
1912
1913    fn reset(&mut self) {
1914        self.rng = seeded_rng(self.seed, 0);
1915        self.line_sampler.reset(self.seed + 1);
1916        self.amount_sampler.reset(self.seed + 2);
1917        self.temporal_sampler.reset(self.seed + 3);
1918        self.count = 0;
1919        self.uuid_factory.reset();
1920
1921        // Reset reference generator by recreating it
1922        let mut ref_gen = ReferenceGenerator::new(
1923            self.start_date.year(),
1924            self.companies
1925                .first()
1926                .map(std::string::String::as_str)
1927                .unwrap_or("1000"),
1928        );
1929        ref_gen.set_prefix(
1930            ReferenceType::Invoice,
1931            &self.template_config.references.invoice_prefix,
1932        );
1933        ref_gen.set_prefix(
1934            ReferenceType::PurchaseOrder,
1935            &self.template_config.references.po_prefix,
1936        );
1937        ref_gen.set_prefix(
1938            ReferenceType::SalesOrder,
1939            &self.template_config.references.so_prefix,
1940        );
1941        self.reference_generator = ref_gen;
1942    }
1943
1944    fn count(&self) -> u64 {
1945        self.count
1946    }
1947
1948    fn seed(&self) -> u64 {
1949        self.seed
1950    }
1951}
1952
1953use datasynth_core::traits::ParallelGenerator;
1954
1955impl ParallelGenerator for JournalEntryGenerator {
1956    /// Split this generator into `parts` independent sub-generators.
1957    ///
1958    /// Each sub-generator gets a deterministic seed derived from the parent seed
1959    /// and its partition index, plus a partitioned UUID factory to avoid contention.
1960    /// The results are deterministic for a given partition count.
1961    fn split(self, parts: usize) -> Vec<Self> {
1962        let parts = parts.max(1);
1963        (0..parts)
1964            .map(|i| {
1965                // Derive a unique seed per partition using a golden-ratio constant
1966                let sub_seed = self
1967                    .seed
1968                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1969
1970                let mut gen = JournalEntryGenerator::new_with_full_config(
1971                    self.config.clone(),
1972                    Arc::clone(&self.coa),
1973                    self.companies.clone(),
1974                    self.start_date,
1975                    self.end_date,
1976                    sub_seed,
1977                    self.template_config.clone(),
1978                    self.user_pool.clone(),
1979                );
1980
1981                // Copy over configuration state
1982                gen.company_selector = self.company_selector.clone();
1983                gen.vendor_pool = self.vendor_pool.clone();
1984                gen.customer_pool = self.customer_pool.clone();
1985                gen.material_pool = self.material_pool.clone();
1986                gen.using_real_master_data = self.using_real_master_data;
1987                gen.fraud_config = self.fraud_config.clone();
1988                gen.persona_errors_enabled = self.persona_errors_enabled;
1989                gen.approval_enabled = self.approval_enabled;
1990                gen.approval_threshold = self.approval_threshold;
1991
1992                // Use partitioned UUID factory to eliminate atomic contention
1993                gen.uuid_factory = DeterministicUuidFactory::for_partition(
1994                    sub_seed,
1995                    GeneratorType::JournalEntry,
1996                    i as u8,
1997                );
1998
1999                // Copy temporal patterns if configured
2000                if let Some(ref config) = self.temporal_patterns_config {
2001                    gen.temporal_patterns_config = Some(config.clone());
2002                    // Rebuild business day calculator from the stored config
2003                    if config.business_days.enabled {
2004                        if let Some(ref bdc) = self.business_day_calculator {
2005                            gen.business_day_calculator = Some(bdc.clone());
2006                        }
2007                    }
2008                    // Rebuild processing lag calculator with partition seed
2009                    if config.processing_lags.enabled {
2010                        let lag_config =
2011                            Self::convert_processing_lag_config(&config.processing_lags);
2012                        gen.processing_lag_calculator =
2013                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2014                    }
2015                }
2016
2017                // Copy drift controller if present
2018                if let Some(ref dc) = self.drift_controller {
2019                    gen.drift_controller = Some(dc.clone());
2020                }
2021
2022                gen
2023            })
2024            .collect()
2025    }
2026}
2027
2028#[cfg(test)]
2029#[allow(clippy::unwrap_used)]
2030mod tests {
2031    use super::*;
2032    use crate::ChartOfAccountsGenerator;
2033
2034    #[test]
2035    fn test_generate_balanced_entries() {
2036        let mut coa_gen =
2037            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2038        let coa = Arc::new(coa_gen.generate());
2039
2040        let mut je_gen = JournalEntryGenerator::new_with_params(
2041            TransactionConfig::default(),
2042            coa,
2043            vec!["1000".to_string()],
2044            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2045            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2046            42,
2047        );
2048
2049        let mut balanced_count = 0;
2050        for _ in 0..100 {
2051            let entry = je_gen.generate();
2052
2053            // Skip entries with human errors as they may be intentionally unbalanced
2054            let has_human_error = entry
2055                .header
2056                .header_text
2057                .as_ref()
2058                .map(|t| t.contains("[HUMAN_ERROR:"))
2059                .unwrap_or(false);
2060
2061            if !has_human_error {
2062                assert!(
2063                    entry.is_balanced(),
2064                    "Entry {:?} is not balanced",
2065                    entry.header.document_id
2066                );
2067                balanced_count += 1;
2068            }
2069            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2070        }
2071
2072        // Ensure most entries are balanced (human errors are rare)
2073        assert!(
2074            balanced_count >= 80,
2075            "Expected at least 80 balanced entries, got {}",
2076            balanced_count
2077        );
2078    }
2079
2080    #[test]
2081    fn test_deterministic_generation() {
2082        let mut coa_gen =
2083            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2084        let coa = Arc::new(coa_gen.generate());
2085
2086        let mut gen1 = JournalEntryGenerator::new_with_params(
2087            TransactionConfig::default(),
2088            Arc::clone(&coa),
2089            vec!["1000".to_string()],
2090            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2091            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2092            42,
2093        );
2094
2095        let mut gen2 = JournalEntryGenerator::new_with_params(
2096            TransactionConfig::default(),
2097            coa,
2098            vec!["1000".to_string()],
2099            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2100            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2101            42,
2102        );
2103
2104        for _ in 0..50 {
2105            let e1 = gen1.generate();
2106            let e2 = gen2.generate();
2107            assert_eq!(e1.header.document_id, e2.header.document_id);
2108            assert_eq!(e1.total_debit(), e2.total_debit());
2109        }
2110    }
2111
2112    #[test]
2113    fn test_templates_generate_descriptions() {
2114        let mut coa_gen =
2115            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2116        let coa = Arc::new(coa_gen.generate());
2117
2118        // Enable all template features
2119        let template_config = TemplateConfig {
2120            names: datasynth_config::schema::NameTemplateConfig {
2121                generate_realistic_names: true,
2122                email_domain: "test.com".to_string(),
2123                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2124            },
2125            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2126                generate_header_text: true,
2127                generate_line_text: true,
2128            },
2129            references: datasynth_config::schema::ReferenceTemplateConfig {
2130                generate_references: true,
2131                invoice_prefix: "TEST-INV".to_string(),
2132                po_prefix: "TEST-PO".to_string(),
2133                so_prefix: "TEST-SO".to_string(),
2134            },
2135        };
2136
2137        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2138            TransactionConfig::default(),
2139            coa,
2140            vec!["1000".to_string()],
2141            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2142            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2143            42,
2144            template_config,
2145            None,
2146        )
2147        .with_persona_errors(false); // Disable for template testing
2148
2149        for _ in 0..10 {
2150            let entry = je_gen.generate();
2151
2152            // Verify header text is populated
2153            assert!(
2154                entry.header.header_text.is_some(),
2155                "Header text should be populated"
2156            );
2157
2158            // Verify reference is populated
2159            assert!(
2160                entry.header.reference.is_some(),
2161                "Reference should be populated"
2162            );
2163
2164            // Verify business process is set
2165            assert!(
2166                entry.header.business_process.is_some(),
2167                "Business process should be set"
2168            );
2169
2170            // Verify line text is populated
2171            for line in &entry.lines {
2172                assert!(line.line_text.is_some(), "Line text should be populated");
2173            }
2174
2175            // Entry should still be balanced
2176            assert!(entry.is_balanced());
2177        }
2178    }
2179
2180    #[test]
2181    fn test_user_pool_integration() {
2182        let mut coa_gen =
2183            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2184        let coa = Arc::new(coa_gen.generate());
2185
2186        let companies = vec!["1000".to_string()];
2187
2188        // Generate user pool
2189        let mut user_gen = crate::UserGenerator::new(42);
2190        let user_pool = user_gen.generate_standard(&companies);
2191
2192        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2193            TransactionConfig::default(),
2194            coa,
2195            companies,
2196            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2197            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2198            42,
2199            TemplateConfig::default(),
2200            Some(user_pool),
2201        );
2202
2203        // Generate entries and verify user IDs are from pool
2204        for _ in 0..20 {
2205            let entry = je_gen.generate();
2206
2207            // User ID should not be generic BATCH/USER format when pool is used
2208            // (though it may still fall back if random selection misses)
2209            assert!(!entry.header.created_by.is_empty());
2210        }
2211    }
2212
2213    #[test]
2214    fn test_master_data_connection() {
2215        let mut coa_gen =
2216            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2217        let coa = Arc::new(coa_gen.generate());
2218
2219        // Create test vendors
2220        let vendors = vec![
2221            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2222            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2223        ];
2224
2225        // Create test customers
2226        let customers = vec![
2227            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2228            Customer::new(
2229                "C-TEST-002",
2230                "Test Customer Two",
2231                CustomerType::SmallBusiness,
2232            ),
2233        ];
2234
2235        // Create test materials
2236        let materials = vec![Material::new(
2237            "MAT-TEST-001",
2238            "Test Material A",
2239            MaterialType::RawMaterial,
2240        )];
2241
2242        // Create generator with master data
2243        let generator = JournalEntryGenerator::new_with_params(
2244            TransactionConfig::default(),
2245            coa,
2246            vec!["1000".to_string()],
2247            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2248            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2249            42,
2250        );
2251
2252        // Without master data
2253        assert!(!generator.is_using_real_master_data());
2254
2255        // Connect master data
2256        let generator_with_data = generator
2257            .with_vendors(&vendors)
2258            .with_customers(&customers)
2259            .with_materials(&materials);
2260
2261        // Should now be using real master data
2262        assert!(generator_with_data.is_using_real_master_data());
2263    }
2264
2265    #[test]
2266    fn test_with_master_data_convenience_method() {
2267        let mut coa_gen =
2268            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2269        let coa = Arc::new(coa_gen.generate());
2270
2271        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2272        let customers = vec![Customer::new(
2273            "C-001",
2274            "Customer One",
2275            CustomerType::Corporate,
2276        )];
2277        let materials = vec![Material::new(
2278            "MAT-001",
2279            "Material One",
2280            MaterialType::RawMaterial,
2281        )];
2282
2283        let generator = JournalEntryGenerator::new_with_params(
2284            TransactionConfig::default(),
2285            coa,
2286            vec!["1000".to_string()],
2287            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2288            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2289            42,
2290        )
2291        .with_master_data(&vendors, &customers, &materials);
2292
2293        assert!(generator.is_using_real_master_data());
2294    }
2295
2296    #[test]
2297    fn test_stress_factors_increase_error_rate() {
2298        let mut coa_gen =
2299            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2300        let coa = Arc::new(coa_gen.generate());
2301
2302        let generator = JournalEntryGenerator::new_with_params(
2303            TransactionConfig::default(),
2304            coa,
2305            vec!["1000".to_string()],
2306            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2307            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2308            42,
2309        );
2310
2311        let base_rate = 0.1;
2312
2313        // Regular day - no stress factors
2314        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2315        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2316        assert!(
2317            (regular_rate - base_rate).abs() < 0.01,
2318            "Regular day should have minimal stress factor adjustment"
2319        );
2320
2321        // Month end - 50% more errors
2322        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2323        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2324        assert!(
2325            month_end_rate > regular_rate,
2326            "Month end should have higher error rate than regular day"
2327        );
2328
2329        // Year end - double the error rate
2330        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2331        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2332        assert!(
2333            year_end_rate > month_end_rate,
2334            "Year end should have highest error rate"
2335        );
2336
2337        // Friday stress
2338        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2339        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2340        assert!(
2341            friday_rate > regular_rate,
2342            "Friday should have higher error rate than mid-week"
2343        );
2344
2345        // Monday stress
2346        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2347        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2348        assert!(
2349            monday_rate > regular_rate,
2350            "Monday should have higher error rate than mid-week"
2351        );
2352    }
2353
2354    #[test]
2355    fn test_batching_produces_similar_entries() {
2356        let mut coa_gen =
2357            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2358        let coa = Arc::new(coa_gen.generate());
2359
2360        // Use seed 123 which is more likely to trigger batching
2361        let mut je_gen = JournalEntryGenerator::new_with_params(
2362            TransactionConfig::default(),
2363            coa,
2364            vec!["1000".to_string()],
2365            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2366            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2367            123,
2368        )
2369        .with_persona_errors(false); // Disable to ensure balanced entries
2370
2371        // Generate many entries - at 15% batch rate, should see some batches
2372        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2373
2374        // Check that all entries are balanced (batched or not)
2375        for entry in &entries {
2376            assert!(
2377                entry.is_balanced(),
2378                "All entries including batched should be balanced"
2379            );
2380        }
2381
2382        // Count entries with same-day posting dates (batch indicator)
2383        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2384            std::collections::HashMap::new();
2385        for entry in &entries {
2386            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2387        }
2388
2389        // With batching, some dates should have multiple entries
2390        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2391        assert!(
2392            dates_with_multiple > 0,
2393            "With batching, should see some dates with multiple entries"
2394        );
2395    }
2396
2397    #[test]
2398    fn test_temporal_patterns_business_days() {
2399        use datasynth_config::schema::{
2400            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2401        };
2402
2403        let mut coa_gen =
2404            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2405        let coa = Arc::new(coa_gen.generate());
2406
2407        // Create temporal patterns config with business days enabled
2408        let temporal_config = TemporalPatternsConfig {
2409            enabled: true,
2410            business_days: BusinessDaySchemaConfig {
2411                enabled: true,
2412                ..Default::default()
2413            },
2414            calendars: CalendarSchemaConfig {
2415                regions: vec!["US".to_string()],
2416                custom_holidays: vec![],
2417            },
2418            ..Default::default()
2419        };
2420
2421        let mut je_gen = JournalEntryGenerator::new_with_params(
2422            TransactionConfig::default(),
2423            coa,
2424            vec!["1000".to_string()],
2425            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2426            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2427            42,
2428        )
2429        .with_temporal_patterns(temporal_config, 42)
2430        .with_persona_errors(false);
2431
2432        // Generate entries and verify none fall on weekends
2433        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2434
2435        for entry in &entries {
2436            let weekday = entry.header.posting_date.weekday();
2437            assert!(
2438                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2439                "Posting date {:?} should not be a weekend",
2440                entry.header.posting_date
2441            );
2442        }
2443    }
2444
2445    #[test]
2446    fn test_default_generation_filters_weekends() {
2447        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2448        // This tests the fix where new_with_full_config always creates a default
2449        // BusinessDayCalculator with US holidays as a fallback.
2450        let mut coa_gen =
2451            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2452        let coa = Arc::new(coa_gen.generate());
2453
2454        let mut je_gen = JournalEntryGenerator::new_with_params(
2455            TransactionConfig::default(),
2456            coa,
2457            vec!["1000".to_string()],
2458            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2459            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2460            42,
2461        )
2462        .with_persona_errors(false);
2463
2464        let total = 500;
2465        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2466
2467        let weekend_count = entries
2468            .iter()
2469            .filter(|e| {
2470                let wd = e.header.posting_date.weekday();
2471                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2472            })
2473            .count();
2474
2475        let weekend_pct = weekend_count as f64 / total as f64;
2476        assert!(
2477            weekend_pct < 0.05,
2478            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2479             but got {:.1}% ({}/{})",
2480            weekend_pct * 100.0,
2481            weekend_count,
2482            total
2483        );
2484    }
2485
2486    #[test]
2487    fn test_document_type_derived_from_business_process() {
2488        let mut coa_gen =
2489            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2490        let coa = Arc::new(coa_gen.generate());
2491
2492        let mut je_gen = JournalEntryGenerator::new_with_params(
2493            TransactionConfig::default(),
2494            coa,
2495            vec!["1000".to_string()],
2496            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2497            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2498            99,
2499        )
2500        .with_persona_errors(false)
2501        .with_batching(false);
2502
2503        let total = 200;
2504        let mut doc_types = std::collections::HashSet::new();
2505        let mut sa_count = 0_usize;
2506
2507        for _ in 0..total {
2508            let entry = je_gen.generate();
2509            let dt = &entry.header.document_type;
2510            doc_types.insert(dt.clone());
2511            if dt == "SA" {
2512                sa_count += 1;
2513            }
2514        }
2515
2516        // Should have more than 3 distinct document types
2517        assert!(
2518            doc_types.len() > 3,
2519            "Expected >3 distinct document types, got {} ({:?})",
2520            doc_types.len(),
2521            doc_types,
2522        );
2523
2524        // "SA" should be less than 50% (R2R is 20% of the weight)
2525        let sa_pct = sa_count as f64 / total as f64;
2526        assert!(
2527            sa_pct < 0.50,
2528            "Expected SA <50%, got {:.1}% ({}/{})",
2529            sa_pct * 100.0,
2530            sa_count,
2531            total,
2532        );
2533    }
2534
2535    #[test]
2536    fn test_enrich_line_items_account_description() {
2537        let mut coa_gen =
2538            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2539        let coa = Arc::new(coa_gen.generate());
2540
2541        let mut je_gen = JournalEntryGenerator::new_with_params(
2542            TransactionConfig::default(),
2543            coa,
2544            vec!["1000".to_string()],
2545            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2546            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2547            42,
2548        )
2549        .with_persona_errors(false);
2550
2551        let total = 200;
2552        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2553
2554        // Count lines with account_description populated
2555        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2556        let lines_with_desc: usize = entries
2557            .iter()
2558            .flat_map(|e| &e.lines)
2559            .filter(|l| l.account_description.is_some())
2560            .count();
2561
2562        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2563        assert!(
2564            desc_pct > 0.95,
2565            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2566            desc_pct * 100.0,
2567            lines_with_desc,
2568            total_lines,
2569        );
2570    }
2571
2572    #[test]
2573    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2574        let mut coa_gen =
2575            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2576        let coa = Arc::new(coa_gen.generate());
2577
2578        let mut je_gen = JournalEntryGenerator::new_with_params(
2579            TransactionConfig::default(),
2580            coa,
2581            vec!["1000".to_string()],
2582            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2583            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2584            42,
2585        )
2586        .with_persona_errors(false);
2587
2588        let total = 300;
2589        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2590
2591        // Count expense account lines (5xxx/6xxx) with cost_center populated
2592        let expense_lines: Vec<&JournalEntryLine> = entries
2593            .iter()
2594            .flat_map(|e| &e.lines)
2595            .filter(|l| {
2596                let first = l.gl_account.chars().next().unwrap_or('0');
2597                first == '5' || first == '6'
2598            })
2599            .collect();
2600
2601        if !expense_lines.is_empty() {
2602            let with_cc = expense_lines
2603                .iter()
2604                .filter(|l| l.cost_center.is_some())
2605                .count();
2606            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2607            assert!(
2608                cc_pct > 0.80,
2609                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2610                cc_pct * 100.0,
2611                with_cc,
2612                expense_lines.len(),
2613            );
2614        }
2615    }
2616
2617    #[test]
2618    fn test_enrich_line_items_profit_center_and_line_text() {
2619        let mut coa_gen =
2620            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2621        let coa = Arc::new(coa_gen.generate());
2622
2623        let mut je_gen = JournalEntryGenerator::new_with_params(
2624            TransactionConfig::default(),
2625            coa,
2626            vec!["1000".to_string()],
2627            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2628            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2629            42,
2630        )
2631        .with_persona_errors(false);
2632
2633        let total = 100;
2634        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2635
2636        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2637
2638        // All lines should have profit_center
2639        let with_pc = entries
2640            .iter()
2641            .flat_map(|e| &e.lines)
2642            .filter(|l| l.profit_center.is_some())
2643            .count();
2644        let pc_pct = with_pc as f64 / total_lines as f64;
2645        assert!(
2646            pc_pct > 0.95,
2647            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2648            pc_pct * 100.0,
2649            with_pc,
2650            total_lines,
2651        );
2652
2653        // All lines should have line_text (either from template or header fallback)
2654        let with_text = entries
2655            .iter()
2656            .flat_map(|e| &e.lines)
2657            .filter(|l| l.line_text.is_some())
2658            .count();
2659        let text_pct = with_text as f64 / total_lines as f64;
2660        assert!(
2661            text_pct > 0.95,
2662            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2663            text_pct * 100.0,
2664            with_text,
2665            total_lines,
2666        );
2667    }
2668}