datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{FraudConfig, GeneratorConfig, TemplateConfig, TransactionConfig};
11use datasynth_core::distributions::{DriftAdjustments, DriftConfig, DriftController, *};
12use datasynth_core::models::*;
13use datasynth_core::templates::{
14    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
15};
16use datasynth_core::traits::Generator;
17use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
18
19use crate::company_selector::WeightedCompanySelector;
20use crate::user_generator::{UserGenerator, UserGeneratorConfig};
21
22/// Generator for realistic journal entries.
23pub struct JournalEntryGenerator {
24    rng: ChaCha8Rng,
25    seed: u64,
26    config: TransactionConfig,
27    coa: Arc<ChartOfAccounts>,
28    companies: Vec<String>,
29    company_selector: WeightedCompanySelector,
30    line_sampler: LineItemSampler,
31    amount_sampler: AmountSampler,
32    temporal_sampler: TemporalSampler,
33    start_date: NaiveDate,
34    end_date: NaiveDate,
35    count: u64,
36    uuid_factory: DeterministicUuidFactory,
37    // Enhanced features
38    user_pool: Option<UserPool>,
39    description_generator: DescriptionGenerator,
40    reference_generator: ReferenceGenerator,
41    template_config: TemplateConfig,
42    vendor_pool: VendorPool,
43    customer_pool: CustomerPool,
44    // Material pool for realistic material references
45    material_pool: Option<MaterialPool>,
46    // Flag indicating whether we're using real master data vs defaults
47    using_real_master_data: bool,
48    // Fraud generation
49    fraud_config: FraudConfig,
50    // Persona-based error injection
51    persona_errors_enabled: bool,
52    // Approval threshold enforcement
53    approval_enabled: bool,
54    approval_threshold: rust_decimal::Decimal,
55    // Batching behavior - humans often process similar items together
56    batch_state: Option<BatchState>,
57    // Temporal drift controller for simulating distribution changes over time
58    drift_controller: Option<DriftController>,
59}
60
61/// State for tracking batch processing behavior.
62///
63/// When humans process transactions, they often batch similar items together
64/// (e.g., processing all invoices from one vendor, entering similar expenses).
65#[derive(Clone)]
66struct BatchState {
67    /// The base entry template to vary
68    base_vendor: Option<String>,
69    base_customer: Option<String>,
70    base_account_number: String,
71    base_amount: rust_decimal::Decimal,
72    base_business_process: Option<BusinessProcess>,
73    base_posting_date: NaiveDate,
74    /// Remaining entries in this batch
75    remaining: u8,
76}
77
78impl JournalEntryGenerator {
79    /// Create a new journal entry generator.
80    pub fn new_with_params(
81        config: TransactionConfig,
82        coa: Arc<ChartOfAccounts>,
83        companies: Vec<String>,
84        start_date: NaiveDate,
85        end_date: NaiveDate,
86        seed: u64,
87    ) -> Self {
88        Self::new_with_full_config(
89            config,
90            coa,
91            companies,
92            start_date,
93            end_date,
94            seed,
95            TemplateConfig::default(),
96            None,
97        )
98    }
99
100    /// Create a new journal entry generator with full configuration.
101    #[allow(clippy::too_many_arguments)]
102    pub fn new_with_full_config(
103        config: TransactionConfig,
104        coa: Arc<ChartOfAccounts>,
105        companies: Vec<String>,
106        start_date: NaiveDate,
107        end_date: NaiveDate,
108        seed: u64,
109        template_config: TemplateConfig,
110        user_pool: Option<UserPool>,
111    ) -> Self {
112        // Initialize user pool if not provided
113        let user_pool = user_pool.or_else(|| {
114            if template_config.names.generate_realistic_names {
115                let user_gen_config = UserGeneratorConfig {
116                    culture_distribution: vec![
117                        (
118                            datasynth_core::templates::NameCulture::WesternUs,
119                            template_config.names.culture_distribution.western_us,
120                        ),
121                        (
122                            datasynth_core::templates::NameCulture::Hispanic,
123                            template_config.names.culture_distribution.hispanic,
124                        ),
125                        (
126                            datasynth_core::templates::NameCulture::German,
127                            template_config.names.culture_distribution.german,
128                        ),
129                        (
130                            datasynth_core::templates::NameCulture::French,
131                            template_config.names.culture_distribution.french,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Chinese,
135                            template_config.names.culture_distribution.chinese,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::Japanese,
139                            template_config.names.culture_distribution.japanese,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::Indian,
143                            template_config.names.culture_distribution.indian,
144                        ),
145                    ],
146                    email_domain: template_config.names.email_domain.clone(),
147                    generate_realistic_names: true,
148                };
149                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
150                Some(user_gen.generate_standard(&companies))
151            } else {
152                None
153            }
154        });
155
156        // Initialize reference generator
157        let mut ref_gen = ReferenceGenerator::new(
158            start_date.year(),
159            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
160        );
161        ref_gen.set_prefix(
162            ReferenceType::Invoice,
163            &template_config.references.invoice_prefix,
164        );
165        ref_gen.set_prefix(
166            ReferenceType::PurchaseOrder,
167            &template_config.references.po_prefix,
168        );
169        ref_gen.set_prefix(
170            ReferenceType::SalesOrder,
171            &template_config.references.so_prefix,
172        );
173
174        // Create weighted company selector (uniform weights for this constructor)
175        let company_selector = WeightedCompanySelector::uniform(companies.clone());
176
177        Self {
178            rng: ChaCha8Rng::seed_from_u64(seed),
179            seed,
180            config: config.clone(),
181            coa,
182            companies,
183            company_selector,
184            line_sampler: LineItemSampler::with_config(
185                seed + 1,
186                config.line_item_distribution.clone(),
187                config.even_odd_distribution.clone(),
188                config.debit_credit_distribution.clone(),
189            ),
190            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
191            temporal_sampler: TemporalSampler::with_config(
192                seed + 3,
193                config.seasonality.clone(),
194                WorkingHoursConfig::default(),
195                Vec::new(),
196            ),
197            start_date,
198            end_date,
199            count: 0,
200            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
201            user_pool,
202            description_generator: DescriptionGenerator::new(),
203            reference_generator: ref_gen,
204            template_config,
205            vendor_pool: VendorPool::standard(),
206            customer_pool: CustomerPool::standard(),
207            material_pool: None,
208            using_real_master_data: false,
209            fraud_config: FraudConfig::default(),
210            persona_errors_enabled: true, // Enable by default for realism
211            approval_enabled: true,       // Enable by default for realism
212            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
213            batch_state: None,
214            drift_controller: None,
215        }
216    }
217
218    /// Create from a full GeneratorConfig.
219    ///
220    /// This constructor uses the volume_weight from company configs
221    /// for weighted company selection, and fraud config from GeneratorConfig.
222    pub fn from_generator_config(
223        full_config: &GeneratorConfig,
224        coa: Arc<ChartOfAccounts>,
225        start_date: NaiveDate,
226        end_date: NaiveDate,
227        seed: u64,
228    ) -> Self {
229        let companies: Vec<String> = full_config
230            .companies
231            .iter()
232            .map(|c| c.code.clone())
233            .collect();
234
235        // Create weighted selector using volume_weight from company configs
236        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
237
238        let mut generator = Self::new_with_full_config(
239            full_config.transactions.clone(),
240            coa,
241            companies,
242            start_date,
243            end_date,
244            seed,
245            full_config.templates.clone(),
246            None,
247        );
248
249        // Override the uniform selector with weighted selector
250        generator.company_selector = company_selector;
251
252        // Set fraud config
253        generator.fraud_config = full_config.fraud.clone();
254
255        generator
256    }
257
258    /// Set a custom company selector.
259    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
260        self.company_selector = selector;
261    }
262
263    /// Get the current company selector.
264    pub fn company_selector(&self) -> &WeightedCompanySelector {
265        &self.company_selector
266    }
267
268    /// Set fraud configuration.
269    pub fn set_fraud_config(&mut self, config: FraudConfig) {
270        self.fraud_config = config;
271    }
272
273    /// Set vendors from generated master data.
274    ///
275    /// This replaces the default vendor pool with actual generated vendors,
276    /// ensuring JEs reference real master data entities.
277    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
278        if !vendors.is_empty() {
279            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
280            self.using_real_master_data = true;
281        }
282        self
283    }
284
285    /// Set customers from generated master data.
286    ///
287    /// This replaces the default customer pool with actual generated customers,
288    /// ensuring JEs reference real master data entities.
289    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
290        if !customers.is_empty() {
291            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
292            self.using_real_master_data = true;
293        }
294        self
295    }
296
297    /// Set materials from generated master data.
298    ///
299    /// This provides material references for JEs that involve inventory movements.
300    pub fn with_materials(mut self, materials: &[Material]) -> Self {
301        if !materials.is_empty() {
302            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
303            self.using_real_master_data = true;
304        }
305        self
306    }
307
308    /// Set all master data at once for convenience.
309    ///
310    /// This is the recommended way to configure the JE generator with
311    /// generated master data to ensure data coherence.
312    pub fn with_master_data(
313        self,
314        vendors: &[Vendor],
315        customers: &[Customer],
316        materials: &[Material],
317    ) -> Self {
318        self.with_vendors(vendors)
319            .with_customers(customers)
320            .with_materials(materials)
321    }
322
323    /// Check if the generator is using real master data.
324    pub fn is_using_real_master_data(&self) -> bool {
325        self.using_real_master_data
326    }
327
328    /// Determine if this transaction should be fraudulent.
329    fn determine_fraud(&mut self) -> Option<FraudType> {
330        if !self.fraud_config.enabled {
331            return None;
332        }
333
334        // Roll for fraud based on fraud rate
335        if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
336            return None;
337        }
338
339        // Select fraud type based on distribution
340        Some(self.select_fraud_type())
341    }
342
343    /// Select a fraud type based on the configured distribution.
344    fn select_fraud_type(&mut self) -> FraudType {
345        let dist = &self.fraud_config.fraud_type_distribution;
346        let roll: f64 = self.rng.gen();
347
348        let mut cumulative = 0.0;
349
350        cumulative += dist.suspense_account_abuse;
351        if roll < cumulative {
352            return FraudType::SuspenseAccountAbuse;
353        }
354
355        cumulative += dist.fictitious_transaction;
356        if roll < cumulative {
357            return FraudType::FictitiousTransaction;
358        }
359
360        cumulative += dist.revenue_manipulation;
361        if roll < cumulative {
362            return FraudType::RevenueManipulation;
363        }
364
365        cumulative += dist.expense_capitalization;
366        if roll < cumulative {
367            return FraudType::ExpenseCapitalization;
368        }
369
370        cumulative += dist.split_transaction;
371        if roll < cumulative {
372            return FraudType::SplitTransaction;
373        }
374
375        cumulative += dist.timing_anomaly;
376        if roll < cumulative {
377            return FraudType::TimingAnomaly;
378        }
379
380        cumulative += dist.unauthorized_access;
381        if roll < cumulative {
382            return FraudType::UnauthorizedAccess;
383        }
384
385        // Default fallback
386        FraudType::DuplicatePayment
387    }
388
389    /// Map a fraud type to an amount pattern for suspicious amounts.
390    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
391        match fraud_type {
392            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
393                FraudAmountPattern::ThresholdAdjacent
394            }
395            FraudType::FictitiousTransaction
396            | FraudType::FictitiousEntry
397            | FraudType::SuspenseAccountAbuse
398            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
399            FraudType::RevenueManipulation
400            | FraudType::ExpenseCapitalization
401            | FraudType::ImproperCapitalization
402            | FraudType::ReserveManipulation
403            | FraudType::UnauthorizedAccess
404            | FraudType::PrematureRevenue
405            | FraudType::UnderstatedLiabilities
406            | FraudType::OverstatedAssets
407            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
408            FraudType::DuplicatePayment
409            | FraudType::TimingAnomaly
410            | FraudType::SelfApproval
411            | FraudType::ExceededApprovalLimit
412            | FraudType::SegregationOfDutiesViolation
413            | FraudType::UnauthorizedApproval
414            | FraudType::CollusiveApproval
415            | FraudType::FictitiousVendor
416            | FraudType::ShellCompanyPayment
417            | FraudType::Kickback
418            | FraudType::KickbackScheme
419            | FraudType::InvoiceManipulation
420            | FraudType::AssetMisappropriation
421            | FraudType::InventoryTheft
422            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
423        }
424    }
425
426    /// Generate a deterministic UUID using the factory.
427    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
428        self.uuid_factory.next()
429    }
430
431    /// Generate a single journal entry.
432    pub fn generate(&mut self) -> JournalEntry {
433        // Check if we're in a batch - if so, generate a batched entry
434        if let Some(ref state) = self.batch_state {
435            if state.remaining > 0 {
436                return self.generate_batched_entry();
437            }
438        }
439
440        self.count += 1;
441
442        // Generate deterministic document ID
443        let document_id = self.generate_deterministic_uuid();
444
445        // Sample posting date
446        let posting_date = self
447            .temporal_sampler
448            .sample_date(self.start_date, self.end_date);
449
450        // Select company using weighted selector
451        let company_code = self.company_selector.select(&mut self.rng).to_string();
452
453        // Sample line item specification
454        let line_spec = self.line_sampler.sample();
455
456        // Determine source type using full 4-way distribution
457        let source = self.select_source();
458        let is_automated = matches!(
459            source,
460            TransactionSource::Automated | TransactionSource::Recurring
461        );
462
463        // Select business process
464        let business_process = self.select_business_process();
465
466        // Determine if this is a fraudulent transaction
467        let fraud_type = self.determine_fraud();
468        let is_fraud = fraud_type.is_some();
469
470        // Sample time based on source
471        let time = self.temporal_sampler.sample_time(!is_automated);
472        let created_at = posting_date.and_time(time).and_utc();
473
474        // Select user from pool or generate generic
475        let (created_by, user_persona) = self.select_user(is_automated);
476
477        // Create header with deterministic UUID
478        let mut header =
479            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
480        header.created_at = created_at;
481        header.source = source;
482        header.created_by = created_by;
483        header.user_persona = user_persona;
484        header.business_process = Some(business_process);
485        header.is_fraud = is_fraud;
486        header.fraud_type = fraud_type;
487
488        // Generate description context
489        let mut context =
490            DescriptionContext::with_period(posting_date.month(), posting_date.year());
491
492        // Add vendor/customer context based on business process
493        match business_process {
494            BusinessProcess::P2P => {
495                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
496                    context.vendor_name = Some(vendor.name.clone());
497                }
498            }
499            BusinessProcess::O2C => {
500                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
501                    context.customer_name = Some(customer.name.clone());
502                }
503            }
504            _ => {}
505        }
506
507        // Generate header text if enabled
508        if self.template_config.descriptions.generate_header_text {
509            header.header_text = Some(self.description_generator.generate_header_text(
510                business_process,
511                &context,
512                &mut self.rng,
513            ));
514        }
515
516        // Generate reference if enabled
517        if self.template_config.references.generate_references {
518            header.reference = Some(
519                self.reference_generator
520                    .generate_for_process_year(business_process, posting_date.year()),
521            );
522        }
523
524        // Generate line items
525        let mut entry = JournalEntry::new(header);
526
527        // Generate amount - use fraud pattern if this is a fraudulent transaction
528        let base_amount = if let Some(ft) = fraud_type {
529            let pattern = self.fraud_type_to_amount_pattern(ft);
530            self.amount_sampler.sample_fraud(pattern)
531        } else {
532            self.amount_sampler.sample()
533        };
534
535        // Apply temporal drift if configured
536        let drift_adjusted_amount = {
537            let drift = self.get_drift_adjustments(posting_date);
538            if drift.amount_mean_multiplier != 1.0 {
539                // Apply drift multiplier (includes seasonal factor if enabled)
540                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
541                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
542                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
543            } else {
544                base_amount
545            }
546        };
547
548        // Apply human variation to amounts for non-automated transactions
549        let total_amount = if is_automated {
550            drift_adjusted_amount // Automated systems use exact amounts
551        } else {
552            self.apply_human_variation(drift_adjusted_amount)
553        };
554
555        // Generate debit lines
556        let debit_amounts = self
557            .amount_sampler
558            .sample_summing_to(line_spec.debit_count, total_amount);
559        for (i, amount) in debit_amounts.into_iter().enumerate() {
560            let account_number = self.select_debit_account().account_number.clone();
561            let mut line = JournalEntryLine::debit(
562                entry.header.document_id,
563                (i + 1) as u32,
564                account_number.clone(),
565                amount,
566            );
567
568            // Generate line text if enabled
569            if self.template_config.descriptions.generate_line_text {
570                line.line_text = Some(self.description_generator.generate_line_text(
571                    &account_number,
572                    &context,
573                    &mut self.rng,
574                ));
575            }
576
577            entry.add_line(line);
578        }
579
580        // Generate credit lines - use the SAME amounts to ensure balance
581        let credit_amounts = self
582            .amount_sampler
583            .sample_summing_to(line_spec.credit_count, total_amount);
584        for (i, amount) in credit_amounts.into_iter().enumerate() {
585            let account_number = self.select_credit_account().account_number.clone();
586            let mut line = JournalEntryLine::credit(
587                entry.header.document_id,
588                (line_spec.debit_count + i + 1) as u32,
589                account_number.clone(),
590                amount,
591            );
592
593            // Generate line text if enabled
594            if self.template_config.descriptions.generate_line_text {
595                line.line_text = Some(self.description_generator.generate_line_text(
596                    &account_number,
597                    &context,
598                    &mut self.rng,
599                ));
600            }
601
602            entry.add_line(line);
603        }
604
605        // Apply persona-based errors if enabled and it's a human user
606        if self.persona_errors_enabled && !is_automated {
607            self.maybe_inject_persona_error(&mut entry);
608        }
609
610        // Apply approval workflow if enabled and amount exceeds threshold
611        if self.approval_enabled {
612            self.maybe_apply_approval_workflow(&mut entry, posting_date);
613        }
614
615        // Maybe start a batch of similar entries for realism
616        self.maybe_start_batch(&entry);
617
618        entry
619    }
620
621    /// Enable or disable persona-based error injection.
622    ///
623    /// When enabled, entries created by human personas have a chance
624    /// to contain realistic human errors based on their experience level.
625    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
626        self.persona_errors_enabled = enabled;
627        self
628    }
629
630    /// Set fraud configuration for fraud injection.
631    ///
632    /// When fraud is enabled in the config, transactions have a chance
633    /// to be marked as fraudulent based on the configured fraud rate.
634    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
635        self.fraud_config = config;
636        self
637    }
638
639    /// Check if persona errors are enabled.
640    pub fn persona_errors_enabled(&self) -> bool {
641        self.persona_errors_enabled
642    }
643
644    /// Enable or disable batch processing behavior.
645    ///
646    /// When enabled (default), the generator will occasionally produce batches
647    /// of similar entries, simulating how humans batch similar work together.
648    pub fn with_batching(mut self, enabled: bool) -> Self {
649        if !enabled {
650            self.batch_state = None;
651        }
652        self
653    }
654
655    /// Check if batch processing is enabled.
656    pub fn batching_enabled(&self) -> bool {
657        // Batching is implicitly enabled when not explicitly disabled
658        true
659    }
660
661    /// Maybe start a batch based on the current entry.
662    ///
663    /// Humans often batch similar work: processing invoices from one vendor,
664    /// entering expense reports for a trip, reconciling similar items.
665    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
666        // Only start batch for non-automated, non-fraud entries
667        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
668            return;
669        }
670
671        // 15% chance to start a batch (most work is not batched)
672        if self.rng.gen::<f64>() > 0.15 {
673            return;
674        }
675
676        // Extract key attributes for batching
677        let base_account = entry
678            .lines
679            .first()
680            .map(|l| l.gl_account.clone())
681            .unwrap_or_default();
682
683        let base_amount = entry.total_debit();
684
685        self.batch_state = Some(BatchState {
686            base_vendor: None, // Would need vendor from context
687            base_customer: None,
688            base_account_number: base_account,
689            base_amount,
690            base_business_process: entry.header.business_process,
691            base_posting_date: entry.header.posting_date,
692            remaining: self.rng.gen_range(2..7), // 2-6 more similar entries
693        });
694    }
695
696    /// Generate an entry that's part of the current batch.
697    ///
698    /// Batched entries have:
699    /// - Same or very similar business process
700    /// - Same posting date (batched work done together)
701    /// - Similar amounts (within ±15%)
702    /// - Same debit account (processing similar items)
703    fn generate_batched_entry(&mut self) -> JournalEntry {
704        use rust_decimal::Decimal;
705
706        // Decrement batch counter
707        if let Some(ref mut state) = self.batch_state {
708            state.remaining = state.remaining.saturating_sub(1);
709        }
710
711        let batch = self.batch_state.clone().unwrap();
712
713        // Use the batch's posting date (work done on same day)
714        let posting_date = batch.base_posting_date;
715
716        self.count += 1;
717        let document_id = self.generate_deterministic_uuid();
718
719        // Select same company (batched work is usually same company)
720        let company_code = self.company_selector.select(&mut self.rng).to_string();
721
722        // Use simplified line spec for batched entries (usually 2-line)
723        let _line_spec = LineItemSpec {
724            total_count: 2,
725            debit_count: 1,
726            credit_count: 1,
727            split_type: DebitCreditSplit::Equal,
728        };
729
730        // Batched entries are always manual
731        let source = TransactionSource::Manual;
732
733        // Use the batch's business process
734        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
735
736        // Sample time
737        let time = self.temporal_sampler.sample_time(true);
738        let created_at = posting_date.and_time(time).and_utc();
739
740        // Same user for batched work
741        let (created_by, user_persona) = self.select_user(false);
742
743        // Create header
744        let mut header =
745            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
746        header.created_at = created_at;
747        header.source = source;
748        header.created_by = created_by;
749        header.user_persona = user_persona;
750        header.business_process = Some(business_process);
751
752        // Generate similar amount (within ±15% of base)
753        let variation = self.rng.gen_range(-0.15..0.15);
754        let varied_amount =
755            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
756        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
757
758        // Create the entry
759        let mut entry = JournalEntry::new(header);
760
761        // Use same debit account as batch base
762        let debit_line = JournalEntryLine::debit(
763            entry.header.document_id,
764            1,
765            batch.base_account_number.clone(),
766            total_amount,
767        );
768        entry.add_line(debit_line);
769
770        // Select a credit account
771        let credit_account = self.select_credit_account().account_number.clone();
772        let credit_line =
773            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
774        entry.add_line(credit_line);
775
776        // Apply persona-based errors if enabled
777        if self.persona_errors_enabled {
778            self.maybe_inject_persona_error(&mut entry);
779        }
780
781        // Apply approval workflow if enabled
782        if self.approval_enabled {
783            self.maybe_apply_approval_workflow(&mut entry, posting_date);
784        }
785
786        // Clear batch state if no more entries remaining
787        if batch.remaining <= 1 {
788            self.batch_state = None;
789        }
790
791        entry
792    }
793
794    /// Maybe inject a persona-appropriate error based on the persona's error rate.
795    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
796        // Parse persona from the entry header
797        let persona_str = &entry.header.user_persona;
798        let persona = match persona_str.to_lowercase().as_str() {
799            s if s.contains("junior") => UserPersona::JuniorAccountant,
800            s if s.contains("senior") => UserPersona::SeniorAccountant,
801            s if s.contains("controller") => UserPersona::Controller,
802            s if s.contains("manager") => UserPersona::Manager,
803            s if s.contains("executive") => UserPersona::Executive,
804            _ => return, // Don't inject errors for unknown personas
805        };
806
807        // Get base error rate from persona
808        let base_error_rate = persona.error_rate();
809
810        // Apply stress factors based on posting date
811        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
812
813        // Check if error should occur based on adjusted rate
814        if self.rng.gen::<f64>() >= adjusted_rate {
815            return; // No error this time
816        }
817
818        // Select and inject persona-appropriate error
819        self.inject_human_error(entry, persona);
820    }
821
822    /// Apply contextual stress factors to the base error rate.
823    ///
824    /// Stress factors increase error likelihood during:
825    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
826    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
827    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
828    /// - Monday morning (catch-up work): 20% more errors
829    /// - Friday afternoon (rushing to leave): 30% more errors
830    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
831        use chrono::Datelike;
832
833        let mut rate = base_rate;
834        let day = posting_date.day();
835        let month = posting_date.month();
836
837        // Year-end stress (December 28-31): double the error rate
838        if month == 12 && day >= 28 {
839            rate *= 2.0;
840            return rate.min(0.5); // Cap at 50% to keep it realistic
841        }
842
843        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
844        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
845            rate *= 1.75; // 75% more errors at quarter end
846            return rate.min(0.4);
847        }
848
849        // Month-end stress (last 3 days of month)
850        if day >= 28 {
851            rate *= 1.5; // 50% more errors at month end
852        }
853
854        // Day-of-week stress effects
855        let weekday = posting_date.weekday();
856        match weekday {
857            chrono::Weekday::Mon => {
858                // Monday: catching up, often rushed
859                rate *= 1.2;
860            }
861            chrono::Weekday::Fri => {
862                // Friday: rushing to finish before weekend
863                rate *= 1.3;
864            }
865            _ => {}
866        }
867
868        // Cap at 40% to keep it realistic
869        rate.min(0.4)
870    }
871
872    /// Apply human-like variation to an amount.
873    ///
874    /// Humans don't enter perfectly calculated amounts - they:
875    /// - Round amounts differently
876    /// - Estimate instead of calculating exactly
877    /// - Make small input variations
878    ///
879    /// This applies small variations (typically ±2%) to make amounts more realistic.
880    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
881        use rust_decimal::Decimal;
882
883        // Automated transactions or very small amounts don't get variation
884        if amount < Decimal::from(10) {
885            return amount;
886        }
887
888        // 70% chance of human variation being applied
889        if self.rng.gen::<f64>() > 0.70 {
890            return amount;
891        }
892
893        // Decide which type of human variation to apply
894        let variation_type: u8 = self.rng.gen_range(0..4);
895
896        match variation_type {
897            0 => {
898                // ±2% variation (common for estimated amounts)
899                let variation_pct = self.rng.gen_range(-0.02..0.02);
900                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
901                (amount + variation).round_dp(2)
902            }
903            1 => {
904                // Round to nearest $10
905                let ten = Decimal::from(10);
906                (amount / ten).round() * ten
907            }
908            2 => {
909                // Round to nearest $100 (for larger amounts)
910                if amount >= Decimal::from(500) {
911                    let hundred = Decimal::from(100);
912                    (amount / hundred).round() * hundred
913                } else {
914                    amount
915                }
916            }
917            3 => {
918                // Slight under/over payment (±$0.01 to ±$1.00)
919                let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
920                (amount + cents).max(Decimal::ZERO).round_dp(2)
921            }
922            _ => amount,
923        }
924    }
925
926    /// Rebalance an entry after a one-sided amount modification.
927    ///
928    /// When an error modifies one line's amount, this finds a line on the opposite
929    /// side (credit if modified was debit, or vice versa) and adjusts it by the
930    /// same impact to maintain balance.
931    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
932        // Find a line on the opposite side to adjust
933        let balancing_idx = entry.lines.iter().position(|l| {
934            if modified_was_debit {
935                l.credit_amount > Decimal::ZERO
936            } else {
937                l.debit_amount > Decimal::ZERO
938            }
939        });
940
941        if let Some(idx) = balancing_idx {
942            if modified_was_debit {
943                entry.lines[idx].credit_amount += impact;
944            } else {
945                entry.lines[idx].debit_amount += impact;
946            }
947        }
948    }
949
950    /// Inject a human-like error based on the persona.
951    ///
952    /// All error types maintain balance - amount modifications are applied to both sides.
953    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
954    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
955        use rust_decimal::Decimal;
956
957        // Different personas make different types of errors
958        let error_type: u8 = match persona {
959            UserPersona::JuniorAccountant => {
960                // Junior accountants make more varied errors
961                self.rng.gen_range(0..5)
962            }
963            UserPersona::SeniorAccountant => {
964                // Senior accountants mainly make transposition errors
965                self.rng.gen_range(0..3)
966            }
967            UserPersona::Controller | UserPersona::Manager => {
968                // Controllers/managers mainly make rounding or cutoff errors
969                self.rng.gen_range(3..5)
970            }
971            _ => return,
972        };
973
974        match error_type {
975            0 => {
976                // Transposed digits in an amount
977                if let Some(line) = entry.lines.get_mut(0) {
978                    let is_debit = line.debit_amount > Decimal::ZERO;
979                    let original_amount = if is_debit {
980                        line.debit_amount
981                    } else {
982                        line.credit_amount
983                    };
984
985                    // Simple digit swap in the string representation
986                    let s = original_amount.to_string();
987                    if s.len() >= 2 {
988                        let chars: Vec<char> = s.chars().collect();
989                        let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
990                        if chars[pos].is_ascii_digit()
991                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
992                        {
993                            let mut new_chars = chars;
994                            new_chars.swap(pos, pos + 1);
995                            if let Ok(new_amount) =
996                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
997                            {
998                                let impact = new_amount - original_amount;
999
1000                                // Apply to the modified line
1001                                if is_debit {
1002                                    entry.lines[0].debit_amount = new_amount;
1003                                } else {
1004                                    entry.lines[0].credit_amount = new_amount;
1005                                }
1006
1007                                // Rebalance the entry
1008                                Self::rebalance_entry(entry, is_debit, impact);
1009
1010                                entry.header.header_text = Some(
1011                                    entry.header.header_text.clone().unwrap_or_default()
1012                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1013                                );
1014                            }
1015                        }
1016                    }
1017                }
1018            }
1019            1 => {
1020                // Wrong decimal place (off by factor of 10)
1021                if let Some(line) = entry.lines.get_mut(0) {
1022                    let is_debit = line.debit_amount > Decimal::ZERO;
1023                    let original_amount = if is_debit {
1024                        line.debit_amount
1025                    } else {
1026                        line.credit_amount
1027                    };
1028
1029                    let new_amount = original_amount * Decimal::new(10, 0);
1030                    let impact = new_amount - original_amount;
1031
1032                    // Apply to the modified line
1033                    if is_debit {
1034                        entry.lines[0].debit_amount = new_amount;
1035                    } else {
1036                        entry.lines[0].credit_amount = new_amount;
1037                    }
1038
1039                    // Rebalance the entry
1040                    Self::rebalance_entry(entry, is_debit, impact);
1041
1042                    entry.header.header_text = Some(
1043                        entry.header.header_text.clone().unwrap_or_default()
1044                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1045                    );
1046                }
1047            }
1048            2 => {
1049                // Typo in description (doesn't affect balance)
1050                if let Some(ref mut text) = entry.header.header_text {
1051                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1052                    let correct = ["the", "and", "with", "that", "receive"];
1053                    let idx = self.rng.gen_range(0..typos.len());
1054                    if text.to_lowercase().contains(correct[idx]) {
1055                        *text = text.replace(correct[idx], typos[idx]);
1056                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1057                    }
1058                }
1059            }
1060            3 => {
1061                // Rounding to round number
1062                if let Some(line) = entry.lines.get_mut(0) {
1063                    let is_debit = line.debit_amount > Decimal::ZERO;
1064                    let original_amount = if is_debit {
1065                        line.debit_amount
1066                    } else {
1067                        line.credit_amount
1068                    };
1069
1070                    let new_amount =
1071                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1072                    let impact = new_amount - original_amount;
1073
1074                    // Apply to the modified line
1075                    if is_debit {
1076                        entry.lines[0].debit_amount = new_amount;
1077                    } else {
1078                        entry.lines[0].credit_amount = new_amount;
1079                    }
1080
1081                    // Rebalance the entry
1082                    Self::rebalance_entry(entry, is_debit, impact);
1083
1084                    entry.header.header_text = Some(
1085                        entry.header.header_text.clone().unwrap_or_default()
1086                            + " [HUMAN_ERROR:ROUNDED]",
1087                    );
1088                }
1089            }
1090            4 => {
1091                // Late posting marker (document date much earlier than posting date)
1092                // This doesn't create an imbalance
1093                if entry.header.document_date == entry.header.posting_date {
1094                    let days_late = self.rng.gen_range(5..15);
1095                    entry.header.document_date =
1096                        entry.header.posting_date - chrono::Duration::days(days_late);
1097                    entry.header.header_text = Some(
1098                        entry.header.header_text.clone().unwrap_or_default()
1099                            + " [HUMAN_ERROR:LATE_POSTING]",
1100                    );
1101                }
1102            }
1103            _ => {}
1104        }
1105    }
1106
1107    /// Apply approval workflow for high-value transactions.
1108    ///
1109    /// If the entry amount exceeds the approval threshold, simulate an
1110    /// approval workflow with appropriate approvers based on amount.
1111    fn maybe_apply_approval_workflow(
1112        &mut self,
1113        entry: &mut JournalEntry,
1114        _posting_date: NaiveDate,
1115    ) {
1116        use rust_decimal::Decimal;
1117
1118        let amount = entry.total_debit();
1119
1120        // Skip if amount is below threshold
1121        if amount <= self.approval_threshold {
1122            // Auto-approved below threshold
1123            let workflow = ApprovalWorkflow::auto_approved(
1124                entry.header.created_by.clone(),
1125                entry.header.user_persona.clone(),
1126                amount,
1127                entry.header.created_at,
1128            );
1129            entry.header.approval_workflow = Some(workflow);
1130            return;
1131        }
1132
1133        // Mark as SOX relevant for high-value transactions
1134        entry.header.sox_relevant = true;
1135
1136        // Determine required approval levels based on amount
1137        let required_levels = if amount > Decimal::new(100000, 0) {
1138            3 // Executive approval required
1139        } else if amount > Decimal::new(50000, 0) {
1140            2 // Senior management approval
1141        } else {
1142            1 // Manager approval
1143        };
1144
1145        // Create the approval workflow
1146        let mut workflow = ApprovalWorkflow::new(
1147            entry.header.created_by.clone(),
1148            entry.header.user_persona.clone(),
1149            amount,
1150        );
1151        workflow.required_levels = required_levels;
1152
1153        // Simulate submission
1154        let submit_time = entry.header.created_at;
1155        let submit_action = ApprovalAction::new(
1156            entry.header.created_by.clone(),
1157            entry.header.user_persona.clone(),
1158            self.parse_persona(&entry.header.user_persona),
1159            ApprovalActionType::Submit,
1160            0,
1161        )
1162        .with_timestamp(submit_time);
1163
1164        workflow.actions.push(submit_action);
1165        workflow.status = ApprovalStatus::Pending;
1166        workflow.submitted_at = Some(submit_time);
1167
1168        // Simulate approvals with realistic delays
1169        let mut current_time = submit_time;
1170        for level in 1..=required_levels {
1171            // Add delay for approval (1-3 business hours per level)
1172            let delay_hours = self.rng.gen_range(1..4);
1173            current_time += chrono::Duration::hours(delay_hours);
1174
1175            // Skip weekends
1176            while current_time.weekday() == chrono::Weekday::Sat
1177                || current_time.weekday() == chrono::Weekday::Sun
1178            {
1179                current_time += chrono::Duration::days(1);
1180            }
1181
1182            // Generate approver based on level
1183            let (approver_id, approver_role) = self.select_approver(level);
1184
1185            let approve_action = ApprovalAction::new(
1186                approver_id.clone(),
1187                format!("{:?}", approver_role),
1188                approver_role,
1189                ApprovalActionType::Approve,
1190                level,
1191            )
1192            .with_timestamp(current_time);
1193
1194            workflow.actions.push(approve_action);
1195            workflow.current_level = level;
1196        }
1197
1198        // Mark as approved
1199        workflow.status = ApprovalStatus::Approved;
1200        workflow.approved_at = Some(current_time);
1201
1202        entry.header.approval_workflow = Some(workflow);
1203    }
1204
1205    /// Select an approver based on the required level.
1206    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1207        let persona = match level {
1208            1 => UserPersona::Manager,
1209            2 => UserPersona::Controller,
1210            _ => UserPersona::Executive,
1211        };
1212
1213        // Try to get from user pool first
1214        if let Some(ref pool) = self.user_pool {
1215            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1216                return (user.user_id.clone(), persona);
1217            }
1218        }
1219
1220        // Fallback to generated approver
1221        let approver_id = match persona {
1222            UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1223            UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1224            UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1225            _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1226        };
1227
1228        (approver_id, persona)
1229    }
1230
1231    /// Parse user persona from string.
1232    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1233        match persona_str.to_lowercase().as_str() {
1234            s if s.contains("junior") => UserPersona::JuniorAccountant,
1235            s if s.contains("senior") => UserPersona::SeniorAccountant,
1236            s if s.contains("controller") => UserPersona::Controller,
1237            s if s.contains("manager") => UserPersona::Manager,
1238            s if s.contains("executive") => UserPersona::Executive,
1239            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1240            _ => UserPersona::JuniorAccountant, // Default
1241        }
1242    }
1243
1244    /// Enable or disable approval workflow.
1245    pub fn with_approval(mut self, enabled: bool) -> Self {
1246        self.approval_enabled = enabled;
1247        self
1248    }
1249
1250    /// Set the approval threshold amount.
1251    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1252        self.approval_threshold = threshold;
1253        self
1254    }
1255
1256    /// Set the temporal drift controller for simulating distribution changes over time.
1257    ///
1258    /// When drift is enabled, amounts and other distributions will shift based on
1259    /// the period (month) to simulate realistic temporal evolution like inflation
1260    /// or increasing fraud rates.
1261    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1262        self.drift_controller = Some(controller);
1263        self
1264    }
1265
1266    /// Set drift configuration directly.
1267    ///
1268    /// Creates a drift controller from the config. Total periods is calculated
1269    /// from the date range.
1270    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1271        if config.enabled {
1272            let total_periods = self.calculate_total_periods();
1273            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1274        }
1275        self
1276    }
1277
1278    /// Calculate total periods (months) in the date range.
1279    fn calculate_total_periods(&self) -> u32 {
1280        let start_year = self.start_date.year();
1281        let start_month = self.start_date.month();
1282        let end_year = self.end_date.year();
1283        let end_month = self.end_date.month();
1284
1285        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1286    }
1287
1288    /// Calculate the period number (0-indexed) for a given date.
1289    fn date_to_period(&self, date: NaiveDate) -> u32 {
1290        let start_year = self.start_date.year();
1291        let start_month = self.start_date.month() as i32;
1292        let date_year = date.year();
1293        let date_month = date.month() as i32;
1294
1295        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1296    }
1297
1298    /// Get drift adjustments for a given date.
1299    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1300        if let Some(ref controller) = self.drift_controller {
1301            let period = self.date_to_period(date);
1302            controller.compute_adjustments(period)
1303        } else {
1304            DriftAdjustments::none()
1305        }
1306    }
1307
1308    /// Select a user from the pool or generate a generic user ID.
1309    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1310        if let Some(ref pool) = self.user_pool {
1311            let persona = if is_automated {
1312                UserPersona::AutomatedSystem
1313            } else {
1314                // Random distribution among human personas
1315                let roll: f64 = self.rng.gen();
1316                if roll < 0.4 {
1317                    UserPersona::JuniorAccountant
1318                } else if roll < 0.7 {
1319                    UserPersona::SeniorAccountant
1320                } else if roll < 0.85 {
1321                    UserPersona::Controller
1322                } else {
1323                    UserPersona::Manager
1324                }
1325            };
1326
1327            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1328                return (
1329                    user.user_id.clone(),
1330                    format!("{:?}", user.persona).to_lowercase(),
1331                );
1332            }
1333        }
1334
1335        // Fallback to generic format
1336        if is_automated {
1337            (
1338                format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1339                "automated_system".to_string(),
1340            )
1341        } else {
1342            (
1343                format!("USER{:04}", self.rng.gen_range(1..=40)),
1344                "senior_accountant".to_string(),
1345            )
1346        }
1347    }
1348
1349    /// Select transaction source based on configuration weights.
1350    fn select_source(&mut self) -> TransactionSource {
1351        let roll: f64 = self.rng.gen();
1352        let dist = &self.config.source_distribution;
1353
1354        if roll < dist.manual {
1355            TransactionSource::Manual
1356        } else if roll < dist.manual + dist.automated {
1357            TransactionSource::Automated
1358        } else if roll < dist.manual + dist.automated + dist.recurring {
1359            TransactionSource::Recurring
1360        } else {
1361            TransactionSource::Adjustment
1362        }
1363    }
1364
1365    /// Select a business process based on configuration weights.
1366    fn select_business_process(&mut self) -> BusinessProcess {
1367        let roll: f64 = self.rng.gen();
1368
1369        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1370        if roll < 0.35 {
1371            BusinessProcess::O2C
1372        } else if roll < 0.65 {
1373            BusinessProcess::P2P
1374        } else if roll < 0.85 {
1375            BusinessProcess::R2R
1376        } else if roll < 0.95 {
1377            BusinessProcess::H2R
1378        } else {
1379            BusinessProcess::A2R
1380        }
1381    }
1382
1383    fn select_debit_account(&mut self) -> &GLAccount {
1384        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1385        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1386
1387        // 60% asset, 40% expense for debits
1388        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1389            accounts
1390        } else {
1391            expense_accounts
1392        };
1393
1394        all.choose(&mut self.rng)
1395            .copied()
1396            .unwrap_or_else(|| &self.coa.accounts[0])
1397    }
1398
1399    fn select_credit_account(&mut self) -> &GLAccount {
1400        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1401        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1402
1403        // 60% liability, 40% revenue for credits
1404        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1405            liability_accounts
1406        } else {
1407            revenue_accounts
1408        };
1409
1410        all.choose(&mut self.rng)
1411            .copied()
1412            .unwrap_or_else(|| &self.coa.accounts[0])
1413    }
1414}
1415
1416impl Generator for JournalEntryGenerator {
1417    type Item = JournalEntry;
1418    type Config = (
1419        TransactionConfig,
1420        Arc<ChartOfAccounts>,
1421        Vec<String>,
1422        NaiveDate,
1423        NaiveDate,
1424    );
1425
1426    fn new(config: Self::Config, seed: u64) -> Self {
1427        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1428    }
1429
1430    fn generate_one(&mut self) -> Self::Item {
1431        self.generate()
1432    }
1433
1434    fn reset(&mut self) {
1435        self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1436        self.line_sampler.reset(self.seed + 1);
1437        self.amount_sampler.reset(self.seed + 2);
1438        self.temporal_sampler.reset(self.seed + 3);
1439        self.count = 0;
1440        self.uuid_factory.reset();
1441
1442        // Reset reference generator by recreating it
1443        let mut ref_gen = ReferenceGenerator::new(
1444            self.start_date.year(),
1445            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1446        );
1447        ref_gen.set_prefix(
1448            ReferenceType::Invoice,
1449            &self.template_config.references.invoice_prefix,
1450        );
1451        ref_gen.set_prefix(
1452            ReferenceType::PurchaseOrder,
1453            &self.template_config.references.po_prefix,
1454        );
1455        ref_gen.set_prefix(
1456            ReferenceType::SalesOrder,
1457            &self.template_config.references.so_prefix,
1458        );
1459        self.reference_generator = ref_gen;
1460    }
1461
1462    fn count(&self) -> u64 {
1463        self.count
1464    }
1465
1466    fn seed(&self) -> u64 {
1467        self.seed
1468    }
1469}
1470
1471#[cfg(test)]
1472mod tests {
1473    use super::*;
1474    use crate::ChartOfAccountsGenerator;
1475
1476    #[test]
1477    fn test_generate_balanced_entries() {
1478        let mut coa_gen =
1479            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1480        let coa = Arc::new(coa_gen.generate());
1481
1482        let mut je_gen = JournalEntryGenerator::new_with_params(
1483            TransactionConfig::default(),
1484            coa,
1485            vec!["1000".to_string()],
1486            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1487            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1488            42,
1489        );
1490
1491        let mut balanced_count = 0;
1492        for _ in 0..100 {
1493            let entry = je_gen.generate();
1494
1495            // Skip entries with human errors as they may be intentionally unbalanced
1496            let has_human_error = entry
1497                .header
1498                .header_text
1499                .as_ref()
1500                .map(|t| t.contains("[HUMAN_ERROR:"))
1501                .unwrap_or(false);
1502
1503            if !has_human_error {
1504                assert!(
1505                    entry.is_balanced(),
1506                    "Entry {:?} is not balanced",
1507                    entry.header.document_id
1508                );
1509                balanced_count += 1;
1510            }
1511            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1512        }
1513
1514        // Ensure most entries are balanced (human errors are rare)
1515        assert!(
1516            balanced_count >= 80,
1517            "Expected at least 80 balanced entries, got {}",
1518            balanced_count
1519        );
1520    }
1521
1522    #[test]
1523    fn test_deterministic_generation() {
1524        let mut coa_gen =
1525            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1526        let coa = Arc::new(coa_gen.generate());
1527
1528        let mut gen1 = JournalEntryGenerator::new_with_params(
1529            TransactionConfig::default(),
1530            Arc::clone(&coa),
1531            vec!["1000".to_string()],
1532            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1533            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1534            42,
1535        );
1536
1537        let mut gen2 = JournalEntryGenerator::new_with_params(
1538            TransactionConfig::default(),
1539            coa,
1540            vec!["1000".to_string()],
1541            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1542            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1543            42,
1544        );
1545
1546        for _ in 0..50 {
1547            let e1 = gen1.generate();
1548            let e2 = gen2.generate();
1549            assert_eq!(e1.header.document_id, e2.header.document_id);
1550            assert_eq!(e1.total_debit(), e2.total_debit());
1551        }
1552    }
1553
1554    #[test]
1555    fn test_templates_generate_descriptions() {
1556        let mut coa_gen =
1557            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1558        let coa = Arc::new(coa_gen.generate());
1559
1560        // Enable all template features
1561        let template_config = TemplateConfig {
1562            names: datasynth_config::schema::NameTemplateConfig {
1563                generate_realistic_names: true,
1564                email_domain: "test.com".to_string(),
1565                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1566            },
1567            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1568                generate_header_text: true,
1569                generate_line_text: true,
1570            },
1571            references: datasynth_config::schema::ReferenceTemplateConfig {
1572                generate_references: true,
1573                invoice_prefix: "TEST-INV".to_string(),
1574                po_prefix: "TEST-PO".to_string(),
1575                so_prefix: "TEST-SO".to_string(),
1576            },
1577        };
1578
1579        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1580            TransactionConfig::default(),
1581            coa,
1582            vec!["1000".to_string()],
1583            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1584            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1585            42,
1586            template_config,
1587            None,
1588        )
1589        .with_persona_errors(false); // Disable for template testing
1590
1591        for _ in 0..10 {
1592            let entry = je_gen.generate();
1593
1594            // Verify header text is populated
1595            assert!(
1596                entry.header.header_text.is_some(),
1597                "Header text should be populated"
1598            );
1599
1600            // Verify reference is populated
1601            assert!(
1602                entry.header.reference.is_some(),
1603                "Reference should be populated"
1604            );
1605
1606            // Verify business process is set
1607            assert!(
1608                entry.header.business_process.is_some(),
1609                "Business process should be set"
1610            );
1611
1612            // Verify line text is populated
1613            for line in &entry.lines {
1614                assert!(line.line_text.is_some(), "Line text should be populated");
1615            }
1616
1617            // Entry should still be balanced
1618            assert!(entry.is_balanced());
1619        }
1620    }
1621
1622    #[test]
1623    fn test_user_pool_integration() {
1624        let mut coa_gen =
1625            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1626        let coa = Arc::new(coa_gen.generate());
1627
1628        let companies = vec!["1000".to_string()];
1629
1630        // Generate user pool
1631        let mut user_gen = crate::UserGenerator::new(42);
1632        let user_pool = user_gen.generate_standard(&companies);
1633
1634        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1635            TransactionConfig::default(),
1636            coa,
1637            companies,
1638            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1639            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1640            42,
1641            TemplateConfig::default(),
1642            Some(user_pool),
1643        );
1644
1645        // Generate entries and verify user IDs are from pool
1646        for _ in 0..20 {
1647            let entry = je_gen.generate();
1648
1649            // User ID should not be generic BATCH/USER format when pool is used
1650            // (though it may still fall back if random selection misses)
1651            assert!(!entry.header.created_by.is_empty());
1652        }
1653    }
1654
1655    #[test]
1656    fn test_master_data_connection() {
1657        let mut coa_gen =
1658            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1659        let coa = Arc::new(coa_gen.generate());
1660
1661        // Create test vendors
1662        let vendors = vec![
1663            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1664            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1665        ];
1666
1667        // Create test customers
1668        let customers = vec![
1669            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1670            Customer::new(
1671                "C-TEST-002",
1672                "Test Customer Two",
1673                CustomerType::SmallBusiness,
1674            ),
1675        ];
1676
1677        // Create test materials
1678        let materials = vec![Material::new(
1679            "MAT-TEST-001",
1680            "Test Material A",
1681            MaterialType::RawMaterial,
1682        )];
1683
1684        // Create generator with master data
1685        let generator = JournalEntryGenerator::new_with_params(
1686            TransactionConfig::default(),
1687            coa,
1688            vec!["1000".to_string()],
1689            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1690            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1691            42,
1692        );
1693
1694        // Without master data
1695        assert!(!generator.is_using_real_master_data());
1696
1697        // Connect master data
1698        let generator_with_data = generator
1699            .with_vendors(&vendors)
1700            .with_customers(&customers)
1701            .with_materials(&materials);
1702
1703        // Should now be using real master data
1704        assert!(generator_with_data.is_using_real_master_data());
1705    }
1706
1707    #[test]
1708    fn test_with_master_data_convenience_method() {
1709        let mut coa_gen =
1710            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1711        let coa = Arc::new(coa_gen.generate());
1712
1713        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1714        let customers = vec![Customer::new(
1715            "C-001",
1716            "Customer One",
1717            CustomerType::Corporate,
1718        )];
1719        let materials = vec![Material::new(
1720            "MAT-001",
1721            "Material One",
1722            MaterialType::RawMaterial,
1723        )];
1724
1725        let generator = JournalEntryGenerator::new_with_params(
1726            TransactionConfig::default(),
1727            coa,
1728            vec!["1000".to_string()],
1729            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1730            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1731            42,
1732        )
1733        .with_master_data(&vendors, &customers, &materials);
1734
1735        assert!(generator.is_using_real_master_data());
1736    }
1737
1738    #[test]
1739    fn test_stress_factors_increase_error_rate() {
1740        let mut coa_gen =
1741            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1742        let coa = Arc::new(coa_gen.generate());
1743
1744        let generator = JournalEntryGenerator::new_with_params(
1745            TransactionConfig::default(),
1746            coa,
1747            vec!["1000".to_string()],
1748            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1749            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1750            42,
1751        );
1752
1753        let base_rate = 0.1;
1754
1755        // Regular day - no stress factors
1756        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
1757        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
1758        assert!(
1759            (regular_rate - base_rate).abs() < 0.01,
1760            "Regular day should have minimal stress factor adjustment"
1761        );
1762
1763        // Month end - 50% more errors
1764        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
1765        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
1766        assert!(
1767            month_end_rate > regular_rate,
1768            "Month end should have higher error rate than regular day"
1769        );
1770
1771        // Year end - double the error rate
1772        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
1773        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
1774        assert!(
1775            year_end_rate > month_end_rate,
1776            "Year end should have highest error rate"
1777        );
1778
1779        // Friday stress
1780        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
1781        let friday_rate = generator.apply_stress_factors(base_rate, friday);
1782        assert!(
1783            friday_rate > regular_rate,
1784            "Friday should have higher error rate than mid-week"
1785        );
1786
1787        // Monday stress
1788        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
1789        let monday_rate = generator.apply_stress_factors(base_rate, monday);
1790        assert!(
1791            monday_rate > regular_rate,
1792            "Monday should have higher error rate than mid-week"
1793        );
1794    }
1795
1796    #[test]
1797    fn test_batching_produces_similar_entries() {
1798        let mut coa_gen =
1799            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1800        let coa = Arc::new(coa_gen.generate());
1801
1802        // Use seed 123 which is more likely to trigger batching
1803        let mut je_gen = JournalEntryGenerator::new_with_params(
1804            TransactionConfig::default(),
1805            coa,
1806            vec!["1000".to_string()],
1807            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1808            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1809            123,
1810        )
1811        .with_persona_errors(false); // Disable to ensure balanced entries
1812
1813        // Generate many entries - at 15% batch rate, should see some batches
1814        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
1815
1816        // Check that all entries are balanced (batched or not)
1817        for entry in &entries {
1818            assert!(
1819                entry.is_balanced(),
1820                "All entries including batched should be balanced"
1821            );
1822        }
1823
1824        // Count entries with same-day posting dates (batch indicator)
1825        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
1826            std::collections::HashMap::new();
1827        for entry in &entries {
1828            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
1829        }
1830
1831        // With batching, some dates should have multiple entries
1832        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
1833        assert!(
1834            dates_with_multiple > 0,
1835            "With batching, should see some dates with multiple entries"
1836        );
1837    }
1838}