datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::Decimal;
7use std::sync::Arc;
8
9use datasynth_config::schema::{FraudConfig, GeneratorConfig, TemplateConfig, TransactionConfig};
10use datasynth_core::distributions::*;
11use datasynth_core::models::*;
12use datasynth_core::templates::{
13    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
14};
15use datasynth_core::traits::Generator;
16use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
17
18use crate::company_selector::WeightedCompanySelector;
19use crate::user_generator::{UserGenerator, UserGeneratorConfig};
20
21/// Generator for realistic journal entries.
22pub struct JournalEntryGenerator {
23    rng: ChaCha8Rng,
24    seed: u64,
25    config: TransactionConfig,
26    coa: Arc<ChartOfAccounts>,
27    companies: Vec<String>,
28    company_selector: WeightedCompanySelector,
29    line_sampler: LineItemSampler,
30    amount_sampler: AmountSampler,
31    temporal_sampler: TemporalSampler,
32    start_date: NaiveDate,
33    end_date: NaiveDate,
34    count: u64,
35    uuid_factory: DeterministicUuidFactory,
36    // Enhanced features
37    user_pool: Option<UserPool>,
38    description_generator: DescriptionGenerator,
39    reference_generator: ReferenceGenerator,
40    template_config: TemplateConfig,
41    vendor_pool: VendorPool,
42    customer_pool: CustomerPool,
43    // Material pool for realistic material references
44    material_pool: Option<MaterialPool>,
45    // Flag indicating whether we're using real master data vs defaults
46    using_real_master_data: bool,
47    // Fraud generation
48    fraud_config: FraudConfig,
49    // Persona-based error injection
50    persona_errors_enabled: bool,
51    // Approval threshold enforcement
52    approval_enabled: bool,
53    approval_threshold: rust_decimal::Decimal,
54    // Batching behavior - humans often process similar items together
55    batch_state: Option<BatchState>,
56}
57
58/// State for tracking batch processing behavior.
59///
60/// When humans process transactions, they often batch similar items together
61/// (e.g., processing all invoices from one vendor, entering similar expenses).
62#[derive(Clone)]
63struct BatchState {
64    /// The base entry template to vary
65    base_vendor: Option<String>,
66    base_customer: Option<String>,
67    base_account_number: String,
68    base_amount: rust_decimal::Decimal,
69    base_business_process: Option<BusinessProcess>,
70    base_posting_date: NaiveDate,
71    /// Remaining entries in this batch
72    remaining: u8,
73}
74
75impl JournalEntryGenerator {
76    /// Create a new journal entry generator.
77    pub fn new_with_params(
78        config: TransactionConfig,
79        coa: Arc<ChartOfAccounts>,
80        companies: Vec<String>,
81        start_date: NaiveDate,
82        end_date: NaiveDate,
83        seed: u64,
84    ) -> Self {
85        Self::new_with_full_config(
86            config,
87            coa,
88            companies,
89            start_date,
90            end_date,
91            seed,
92            TemplateConfig::default(),
93            None,
94        )
95    }
96
97    /// Create a new journal entry generator with full configuration.
98    #[allow(clippy::too_many_arguments)]
99    pub fn new_with_full_config(
100        config: TransactionConfig,
101        coa: Arc<ChartOfAccounts>,
102        companies: Vec<String>,
103        start_date: NaiveDate,
104        end_date: NaiveDate,
105        seed: u64,
106        template_config: TemplateConfig,
107        user_pool: Option<UserPool>,
108    ) -> Self {
109        // Initialize user pool if not provided
110        let user_pool = user_pool.or_else(|| {
111            if template_config.names.generate_realistic_names {
112                let user_gen_config = UserGeneratorConfig {
113                    culture_distribution: vec![
114                        (
115                            datasynth_core::templates::NameCulture::WesternUs,
116                            template_config.names.culture_distribution.western_us,
117                        ),
118                        (
119                            datasynth_core::templates::NameCulture::Hispanic,
120                            template_config.names.culture_distribution.hispanic,
121                        ),
122                        (
123                            datasynth_core::templates::NameCulture::German,
124                            template_config.names.culture_distribution.german,
125                        ),
126                        (
127                            datasynth_core::templates::NameCulture::French,
128                            template_config.names.culture_distribution.french,
129                        ),
130                        (
131                            datasynth_core::templates::NameCulture::Chinese,
132                            template_config.names.culture_distribution.chinese,
133                        ),
134                        (
135                            datasynth_core::templates::NameCulture::Japanese,
136                            template_config.names.culture_distribution.japanese,
137                        ),
138                        (
139                            datasynth_core::templates::NameCulture::Indian,
140                            template_config.names.culture_distribution.indian,
141                        ),
142                    ],
143                    email_domain: template_config.names.email_domain.clone(),
144                    generate_realistic_names: true,
145                };
146                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
147                Some(user_gen.generate_standard(&companies))
148            } else {
149                None
150            }
151        });
152
153        // Initialize reference generator
154        let mut ref_gen = ReferenceGenerator::new(
155            start_date.year(),
156            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
157        );
158        ref_gen.set_prefix(
159            ReferenceType::Invoice,
160            &template_config.references.invoice_prefix,
161        );
162        ref_gen.set_prefix(
163            ReferenceType::PurchaseOrder,
164            &template_config.references.po_prefix,
165        );
166        ref_gen.set_prefix(
167            ReferenceType::SalesOrder,
168            &template_config.references.so_prefix,
169        );
170
171        // Create weighted company selector (uniform weights for this constructor)
172        let company_selector = WeightedCompanySelector::uniform(companies.clone());
173
174        Self {
175            rng: ChaCha8Rng::seed_from_u64(seed),
176            seed,
177            config: config.clone(),
178            coa,
179            companies,
180            company_selector,
181            line_sampler: LineItemSampler::with_config(
182                seed + 1,
183                config.line_item_distribution.clone(),
184                config.even_odd_distribution.clone(),
185                config.debit_credit_distribution.clone(),
186            ),
187            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
188            temporal_sampler: TemporalSampler::with_config(
189                seed + 3,
190                config.seasonality.clone(),
191                WorkingHoursConfig::default(),
192                Vec::new(),
193            ),
194            start_date,
195            end_date,
196            count: 0,
197            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
198            user_pool,
199            description_generator: DescriptionGenerator::new(),
200            reference_generator: ref_gen,
201            template_config,
202            vendor_pool: VendorPool::standard(),
203            customer_pool: CustomerPool::standard(),
204            material_pool: None,
205            using_real_master_data: false,
206            fraud_config: FraudConfig::default(),
207            persona_errors_enabled: true, // Enable by default for realism
208            approval_enabled: true,       // Enable by default for realism
209            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
210            batch_state: None,
211        }
212    }
213
214    /// Create from a full GeneratorConfig.
215    ///
216    /// This constructor uses the volume_weight from company configs
217    /// for weighted company selection, and fraud config from GeneratorConfig.
218    pub fn from_generator_config(
219        full_config: &GeneratorConfig,
220        coa: Arc<ChartOfAccounts>,
221        start_date: NaiveDate,
222        end_date: NaiveDate,
223        seed: u64,
224    ) -> Self {
225        let companies: Vec<String> = full_config
226            .companies
227            .iter()
228            .map(|c| c.code.clone())
229            .collect();
230
231        // Create weighted selector using volume_weight from company configs
232        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
233
234        let mut generator = Self::new_with_full_config(
235            full_config.transactions.clone(),
236            coa,
237            companies,
238            start_date,
239            end_date,
240            seed,
241            full_config.templates.clone(),
242            None,
243        );
244
245        // Override the uniform selector with weighted selector
246        generator.company_selector = company_selector;
247
248        // Set fraud config
249        generator.fraud_config = full_config.fraud.clone();
250
251        generator
252    }
253
254    /// Set a custom company selector.
255    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
256        self.company_selector = selector;
257    }
258
259    /// Get the current company selector.
260    pub fn company_selector(&self) -> &WeightedCompanySelector {
261        &self.company_selector
262    }
263
264    /// Set fraud configuration.
265    pub fn set_fraud_config(&mut self, config: FraudConfig) {
266        self.fraud_config = config;
267    }
268
269    /// Set vendors from generated master data.
270    ///
271    /// This replaces the default vendor pool with actual generated vendors,
272    /// ensuring JEs reference real master data entities.
273    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
274        if !vendors.is_empty() {
275            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
276            self.using_real_master_data = true;
277        }
278        self
279    }
280
281    /// Set customers from generated master data.
282    ///
283    /// This replaces the default customer pool with actual generated customers,
284    /// ensuring JEs reference real master data entities.
285    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
286        if !customers.is_empty() {
287            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
288            self.using_real_master_data = true;
289        }
290        self
291    }
292
293    /// Set materials from generated master data.
294    ///
295    /// This provides material references for JEs that involve inventory movements.
296    pub fn with_materials(mut self, materials: &[Material]) -> Self {
297        if !materials.is_empty() {
298            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
299            self.using_real_master_data = true;
300        }
301        self
302    }
303
304    /// Set all master data at once for convenience.
305    ///
306    /// This is the recommended way to configure the JE generator with
307    /// generated master data to ensure data coherence.
308    pub fn with_master_data(
309        self,
310        vendors: &[Vendor],
311        customers: &[Customer],
312        materials: &[Material],
313    ) -> Self {
314        self.with_vendors(vendors)
315            .with_customers(customers)
316            .with_materials(materials)
317    }
318
319    /// Check if the generator is using real master data.
320    pub fn is_using_real_master_data(&self) -> bool {
321        self.using_real_master_data
322    }
323
324    /// Determine if this transaction should be fraudulent.
325    fn determine_fraud(&mut self) -> Option<FraudType> {
326        if !self.fraud_config.enabled {
327            return None;
328        }
329
330        // Roll for fraud based on fraud rate
331        if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
332            return None;
333        }
334
335        // Select fraud type based on distribution
336        Some(self.select_fraud_type())
337    }
338
339    /// Select a fraud type based on the configured distribution.
340    fn select_fraud_type(&mut self) -> FraudType {
341        let dist = &self.fraud_config.fraud_type_distribution;
342        let roll: f64 = self.rng.gen();
343
344        let mut cumulative = 0.0;
345
346        cumulative += dist.suspense_account_abuse;
347        if roll < cumulative {
348            return FraudType::SuspenseAccountAbuse;
349        }
350
351        cumulative += dist.fictitious_transaction;
352        if roll < cumulative {
353            return FraudType::FictitiousTransaction;
354        }
355
356        cumulative += dist.revenue_manipulation;
357        if roll < cumulative {
358            return FraudType::RevenueManipulation;
359        }
360
361        cumulative += dist.expense_capitalization;
362        if roll < cumulative {
363            return FraudType::ExpenseCapitalization;
364        }
365
366        cumulative += dist.split_transaction;
367        if roll < cumulative {
368            return FraudType::SplitTransaction;
369        }
370
371        cumulative += dist.timing_anomaly;
372        if roll < cumulative {
373            return FraudType::TimingAnomaly;
374        }
375
376        cumulative += dist.unauthorized_access;
377        if roll < cumulative {
378            return FraudType::UnauthorizedAccess;
379        }
380
381        // Default fallback
382        FraudType::DuplicatePayment
383    }
384
385    /// Map a fraud type to an amount pattern for suspicious amounts.
386    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
387        match fraud_type {
388            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
389                FraudAmountPattern::ThresholdAdjacent
390            }
391            FraudType::FictitiousTransaction
392            | FraudType::FictitiousEntry
393            | FraudType::SuspenseAccountAbuse
394            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
395            FraudType::RevenueManipulation
396            | FraudType::ExpenseCapitalization
397            | FraudType::ImproperCapitalization
398            | FraudType::ReserveManipulation
399            | FraudType::UnauthorizedAccess
400            | FraudType::PrematureRevenue
401            | FraudType::UnderstatedLiabilities
402            | FraudType::OverstatedAssets
403            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
404            FraudType::DuplicatePayment
405            | FraudType::TimingAnomaly
406            | FraudType::SelfApproval
407            | FraudType::ExceededApprovalLimit
408            | FraudType::SegregationOfDutiesViolation
409            | FraudType::UnauthorizedApproval
410            | FraudType::CollusiveApproval
411            | FraudType::FictitiousVendor
412            | FraudType::ShellCompanyPayment
413            | FraudType::Kickback
414            | FraudType::KickbackScheme
415            | FraudType::InvoiceManipulation
416            | FraudType::AssetMisappropriation
417            | FraudType::InventoryTheft
418            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
419        }
420    }
421
422    /// Generate a deterministic UUID using the factory.
423    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
424        self.uuid_factory.next()
425    }
426
427    /// Generate a single journal entry.
428    pub fn generate(&mut self) -> JournalEntry {
429        // Check if we're in a batch - if so, generate a batched entry
430        if let Some(ref state) = self.batch_state {
431            if state.remaining > 0 {
432                return self.generate_batched_entry();
433            }
434        }
435
436        self.count += 1;
437
438        // Generate deterministic document ID
439        let document_id = self.generate_deterministic_uuid();
440
441        // Sample posting date
442        let posting_date = self
443            .temporal_sampler
444            .sample_date(self.start_date, self.end_date);
445
446        // Select company using weighted selector
447        let company_code = self.company_selector.select(&mut self.rng).to_string();
448
449        // Sample line item specification
450        let line_spec = self.line_sampler.sample();
451
452        // Determine source type using full 4-way distribution
453        let source = self.select_source();
454        let is_automated = matches!(
455            source,
456            TransactionSource::Automated | TransactionSource::Recurring
457        );
458
459        // Select business process
460        let business_process = self.select_business_process();
461
462        // Determine if this is a fraudulent transaction
463        let fraud_type = self.determine_fraud();
464        let is_fraud = fraud_type.is_some();
465
466        // Sample time based on source
467        let time = self.temporal_sampler.sample_time(!is_automated);
468        let created_at = posting_date.and_time(time).and_utc();
469
470        // Select user from pool or generate generic
471        let (created_by, user_persona) = self.select_user(is_automated);
472
473        // Create header with deterministic UUID
474        let mut header =
475            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
476        header.created_at = created_at;
477        header.source = source;
478        header.created_by = created_by;
479        header.user_persona = user_persona;
480        header.business_process = Some(business_process);
481        header.is_fraud = is_fraud;
482        header.fraud_type = fraud_type;
483
484        // Generate description context
485        let mut context =
486            DescriptionContext::with_period(posting_date.month(), posting_date.year());
487
488        // Add vendor/customer context based on business process
489        match business_process {
490            BusinessProcess::P2P => {
491                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
492                    context.vendor_name = Some(vendor.name.clone());
493                }
494            }
495            BusinessProcess::O2C => {
496                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
497                    context.customer_name = Some(customer.name.clone());
498                }
499            }
500            _ => {}
501        }
502
503        // Generate header text if enabled
504        if self.template_config.descriptions.generate_header_text {
505            header.header_text = Some(self.description_generator.generate_header_text(
506                business_process,
507                &context,
508                &mut self.rng,
509            ));
510        }
511
512        // Generate reference if enabled
513        if self.template_config.references.generate_references {
514            header.reference = Some(
515                self.reference_generator
516                    .generate_for_process_year(business_process, posting_date.year()),
517            );
518        }
519
520        // Generate line items
521        let mut entry = JournalEntry::new(header);
522
523        // Generate amount - use fraud pattern if this is a fraudulent transaction
524        let base_amount = if let Some(ft) = fraud_type {
525            let pattern = self.fraud_type_to_amount_pattern(ft);
526            self.amount_sampler.sample_fraud(pattern)
527        } else {
528            self.amount_sampler.sample()
529        };
530
531        // Apply human variation to amounts for non-automated transactions
532        let total_amount = if is_automated {
533            base_amount // Automated systems use exact amounts
534        } else {
535            self.apply_human_variation(base_amount)
536        };
537
538        // Generate debit lines
539        let debit_amounts = self
540            .amount_sampler
541            .sample_summing_to(line_spec.debit_count, total_amount);
542        for (i, amount) in debit_amounts.into_iter().enumerate() {
543            let account_number = self.select_debit_account().account_number.clone();
544            let mut line = JournalEntryLine::debit(
545                entry.header.document_id,
546                (i + 1) as u32,
547                account_number.clone(),
548                amount,
549            );
550
551            // Generate line text if enabled
552            if self.template_config.descriptions.generate_line_text {
553                line.line_text = Some(self.description_generator.generate_line_text(
554                    &account_number,
555                    &context,
556                    &mut self.rng,
557                ));
558            }
559
560            entry.add_line(line);
561        }
562
563        // Generate credit lines - use the SAME amounts to ensure balance
564        let credit_amounts = self
565            .amount_sampler
566            .sample_summing_to(line_spec.credit_count, total_amount);
567        for (i, amount) in credit_amounts.into_iter().enumerate() {
568            let account_number = self.select_credit_account().account_number.clone();
569            let mut line = JournalEntryLine::credit(
570                entry.header.document_id,
571                (line_spec.debit_count + i + 1) as u32,
572                account_number.clone(),
573                amount,
574            );
575
576            // Generate line text if enabled
577            if self.template_config.descriptions.generate_line_text {
578                line.line_text = Some(self.description_generator.generate_line_text(
579                    &account_number,
580                    &context,
581                    &mut self.rng,
582                ));
583            }
584
585            entry.add_line(line);
586        }
587
588        // Apply persona-based errors if enabled and it's a human user
589        if self.persona_errors_enabled && !is_automated {
590            self.maybe_inject_persona_error(&mut entry);
591        }
592
593        // Apply approval workflow if enabled and amount exceeds threshold
594        if self.approval_enabled {
595            self.maybe_apply_approval_workflow(&mut entry, posting_date);
596        }
597
598        // Maybe start a batch of similar entries for realism
599        self.maybe_start_batch(&entry);
600
601        entry
602    }
603
604    /// Enable or disable persona-based error injection.
605    ///
606    /// When enabled, entries created by human personas have a chance
607    /// to contain realistic human errors based on their experience level.
608    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
609        self.persona_errors_enabled = enabled;
610        self
611    }
612
613    /// Set fraud configuration for fraud injection.
614    ///
615    /// When fraud is enabled in the config, transactions have a chance
616    /// to be marked as fraudulent based on the configured fraud rate.
617    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
618        self.fraud_config = config;
619        self
620    }
621
622    /// Check if persona errors are enabled.
623    pub fn persona_errors_enabled(&self) -> bool {
624        self.persona_errors_enabled
625    }
626
627    /// Enable or disable batch processing behavior.
628    ///
629    /// When enabled (default), the generator will occasionally produce batches
630    /// of similar entries, simulating how humans batch similar work together.
631    pub fn with_batching(mut self, enabled: bool) -> Self {
632        if !enabled {
633            self.batch_state = None;
634        }
635        self
636    }
637
638    /// Check if batch processing is enabled.
639    pub fn batching_enabled(&self) -> bool {
640        // Batching is implicitly enabled when not explicitly disabled
641        true
642    }
643
644    /// Maybe start a batch based on the current entry.
645    ///
646    /// Humans often batch similar work: processing invoices from one vendor,
647    /// entering expense reports for a trip, reconciling similar items.
648    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
649        // Only start batch for non-automated, non-fraud entries
650        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
651            return;
652        }
653
654        // 15% chance to start a batch (most work is not batched)
655        if self.rng.gen::<f64>() > 0.15 {
656            return;
657        }
658
659        // Extract key attributes for batching
660        let base_account = entry
661            .lines
662            .first()
663            .map(|l| l.gl_account.clone())
664            .unwrap_or_default();
665
666        let base_amount = entry.total_debit();
667
668        self.batch_state = Some(BatchState {
669            base_vendor: None, // Would need vendor from context
670            base_customer: None,
671            base_account_number: base_account,
672            base_amount,
673            base_business_process: entry.header.business_process,
674            base_posting_date: entry.header.posting_date,
675            remaining: self.rng.gen_range(2..7), // 2-6 more similar entries
676        });
677    }
678
679    /// Generate an entry that's part of the current batch.
680    ///
681    /// Batched entries have:
682    /// - Same or very similar business process
683    /// - Same posting date (batched work done together)
684    /// - Similar amounts (within ±15%)
685    /// - Same debit account (processing similar items)
686    fn generate_batched_entry(&mut self) -> JournalEntry {
687        use rust_decimal::Decimal;
688
689        // Decrement batch counter
690        if let Some(ref mut state) = self.batch_state {
691            state.remaining = state.remaining.saturating_sub(1);
692        }
693
694        let batch = self.batch_state.clone().unwrap();
695
696        // Use the batch's posting date (work done on same day)
697        let posting_date = batch.base_posting_date;
698
699        self.count += 1;
700        let document_id = self.generate_deterministic_uuid();
701
702        // Select same company (batched work is usually same company)
703        let company_code = self.company_selector.select(&mut self.rng).to_string();
704
705        // Use simplified line spec for batched entries (usually 2-line)
706        let _line_spec = LineItemSpec {
707            total_count: 2,
708            debit_count: 1,
709            credit_count: 1,
710            split_type: DebitCreditSplit::Equal,
711        };
712
713        // Batched entries are always manual
714        let source = TransactionSource::Manual;
715
716        // Use the batch's business process
717        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
718
719        // Sample time
720        let time = self.temporal_sampler.sample_time(true);
721        let created_at = posting_date.and_time(time).and_utc();
722
723        // Same user for batched work
724        let (created_by, user_persona) = self.select_user(false);
725
726        // Create header
727        let mut header =
728            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
729        header.created_at = created_at;
730        header.source = source;
731        header.created_by = created_by;
732        header.user_persona = user_persona;
733        header.business_process = Some(business_process);
734
735        // Generate similar amount (within ±15% of base)
736        let variation = self.rng.gen_range(-0.15..0.15);
737        let varied_amount =
738            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
739        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
740
741        // Create the entry
742        let mut entry = JournalEntry::new(header);
743
744        // Use same debit account as batch base
745        let debit_line = JournalEntryLine::debit(
746            entry.header.document_id,
747            1,
748            batch.base_account_number.clone(),
749            total_amount,
750        );
751        entry.add_line(debit_line);
752
753        // Select a credit account
754        let credit_account = self.select_credit_account().account_number.clone();
755        let credit_line =
756            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
757        entry.add_line(credit_line);
758
759        // Apply persona-based errors if enabled
760        if self.persona_errors_enabled {
761            self.maybe_inject_persona_error(&mut entry);
762        }
763
764        // Apply approval workflow if enabled
765        if self.approval_enabled {
766            self.maybe_apply_approval_workflow(&mut entry, posting_date);
767        }
768
769        // Clear batch state if no more entries remaining
770        if batch.remaining <= 1 {
771            self.batch_state = None;
772        }
773
774        entry
775    }
776
777    /// Maybe inject a persona-appropriate error based on the persona's error rate.
778    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
779        // Parse persona from the entry header
780        let persona_str = &entry.header.user_persona;
781        let persona = match persona_str.to_lowercase().as_str() {
782            s if s.contains("junior") => UserPersona::JuniorAccountant,
783            s if s.contains("senior") => UserPersona::SeniorAccountant,
784            s if s.contains("controller") => UserPersona::Controller,
785            s if s.contains("manager") => UserPersona::Manager,
786            s if s.contains("executive") => UserPersona::Executive,
787            _ => return, // Don't inject errors for unknown personas
788        };
789
790        // Get base error rate from persona
791        let base_error_rate = persona.error_rate();
792
793        // Apply stress factors based on posting date
794        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
795
796        // Check if error should occur based on adjusted rate
797        if self.rng.gen::<f64>() >= adjusted_rate {
798            return; // No error this time
799        }
800
801        // Select and inject persona-appropriate error
802        self.inject_human_error(entry, persona);
803    }
804
805    /// Apply contextual stress factors to the base error rate.
806    ///
807    /// Stress factors increase error likelihood during:
808    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
809    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
810    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
811    /// - Monday morning (catch-up work): 20% more errors
812    /// - Friday afternoon (rushing to leave): 30% more errors
813    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
814        use chrono::Datelike;
815
816        let mut rate = base_rate;
817        let day = posting_date.day();
818        let month = posting_date.month();
819
820        // Year-end stress (December 28-31): double the error rate
821        if month == 12 && day >= 28 {
822            rate *= 2.0;
823            return rate.min(0.5); // Cap at 50% to keep it realistic
824        }
825
826        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
827        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
828            rate *= 1.75; // 75% more errors at quarter end
829            return rate.min(0.4);
830        }
831
832        // Month-end stress (last 3 days of month)
833        if day >= 28 {
834            rate *= 1.5; // 50% more errors at month end
835        }
836
837        // Day-of-week stress effects
838        let weekday = posting_date.weekday();
839        match weekday {
840            chrono::Weekday::Mon => {
841                // Monday: catching up, often rushed
842                rate *= 1.2;
843            }
844            chrono::Weekday::Fri => {
845                // Friday: rushing to finish before weekend
846                rate *= 1.3;
847            }
848            _ => {}
849        }
850
851        // Cap at 40% to keep it realistic
852        rate.min(0.4)
853    }
854
855    /// Apply human-like variation to an amount.
856    ///
857    /// Humans don't enter perfectly calculated amounts - they:
858    /// - Round amounts differently
859    /// - Estimate instead of calculating exactly
860    /// - Make small input variations
861    ///
862    /// This applies small variations (typically ±2%) to make amounts more realistic.
863    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
864        use rust_decimal::Decimal;
865
866        // Automated transactions or very small amounts don't get variation
867        if amount < Decimal::from(10) {
868            return amount;
869        }
870
871        // 70% chance of human variation being applied
872        if self.rng.gen::<f64>() > 0.70 {
873            return amount;
874        }
875
876        // Decide which type of human variation to apply
877        let variation_type: u8 = self.rng.gen_range(0..4);
878
879        match variation_type {
880            0 => {
881                // ±2% variation (common for estimated amounts)
882                let variation_pct = self.rng.gen_range(-0.02..0.02);
883                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
884                (amount + variation).round_dp(2)
885            }
886            1 => {
887                // Round to nearest $10
888                let ten = Decimal::from(10);
889                (amount / ten).round() * ten
890            }
891            2 => {
892                // Round to nearest $100 (for larger amounts)
893                if amount >= Decimal::from(500) {
894                    let hundred = Decimal::from(100);
895                    (amount / hundred).round() * hundred
896                } else {
897                    amount
898                }
899            }
900            3 => {
901                // Slight under/over payment (±$0.01 to ±$1.00)
902                let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
903                (amount + cents).max(Decimal::ZERO).round_dp(2)
904            }
905            _ => amount,
906        }
907    }
908
909    /// Rebalance an entry after a one-sided amount modification.
910    ///
911    /// When an error modifies one line's amount, this finds a line on the opposite
912    /// side (credit if modified was debit, or vice versa) and adjusts it by the
913    /// same impact to maintain balance.
914    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
915        // Find a line on the opposite side to adjust
916        let balancing_idx = entry.lines.iter().position(|l| {
917            if modified_was_debit {
918                l.credit_amount > Decimal::ZERO
919            } else {
920                l.debit_amount > Decimal::ZERO
921            }
922        });
923
924        if let Some(idx) = balancing_idx {
925            if modified_was_debit {
926                entry.lines[idx].credit_amount += impact;
927            } else {
928                entry.lines[idx].debit_amount += impact;
929            }
930        }
931    }
932
933    /// Inject a human-like error based on the persona.
934    ///
935    /// All error types maintain balance - amount modifications are applied to both sides.
936    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
937    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
938        use rust_decimal::Decimal;
939
940        // Different personas make different types of errors
941        let error_type: u8 = match persona {
942            UserPersona::JuniorAccountant => {
943                // Junior accountants make more varied errors
944                self.rng.gen_range(0..5)
945            }
946            UserPersona::SeniorAccountant => {
947                // Senior accountants mainly make transposition errors
948                self.rng.gen_range(0..3)
949            }
950            UserPersona::Controller | UserPersona::Manager => {
951                // Controllers/managers mainly make rounding or cutoff errors
952                self.rng.gen_range(3..5)
953            }
954            _ => return,
955        };
956
957        match error_type {
958            0 => {
959                // Transposed digits in an amount
960                if let Some(line) = entry.lines.get_mut(0) {
961                    let is_debit = line.debit_amount > Decimal::ZERO;
962                    let original_amount = if is_debit {
963                        line.debit_amount
964                    } else {
965                        line.credit_amount
966                    };
967
968                    // Simple digit swap in the string representation
969                    let s = original_amount.to_string();
970                    if s.len() >= 2 {
971                        let chars: Vec<char> = s.chars().collect();
972                        let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
973                        if chars[pos].is_ascii_digit()
974                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
975                        {
976                            let mut new_chars = chars;
977                            new_chars.swap(pos, pos + 1);
978                            if let Ok(new_amount) =
979                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
980                            {
981                                let impact = new_amount - original_amount;
982
983                                // Apply to the modified line
984                                if is_debit {
985                                    entry.lines[0].debit_amount = new_amount;
986                                } else {
987                                    entry.lines[0].credit_amount = new_amount;
988                                }
989
990                                // Rebalance the entry
991                                Self::rebalance_entry(entry, is_debit, impact);
992
993                                entry.header.header_text = Some(
994                                    entry.header.header_text.clone().unwrap_or_default()
995                                        + " [HUMAN_ERROR:TRANSPOSITION]",
996                                );
997                            }
998                        }
999                    }
1000                }
1001            }
1002            1 => {
1003                // Wrong decimal place (off by factor of 10)
1004                if let Some(line) = entry.lines.get_mut(0) {
1005                    let is_debit = line.debit_amount > Decimal::ZERO;
1006                    let original_amount = if is_debit {
1007                        line.debit_amount
1008                    } else {
1009                        line.credit_amount
1010                    };
1011
1012                    let new_amount = original_amount * Decimal::new(10, 0);
1013                    let impact = new_amount - original_amount;
1014
1015                    // Apply to the modified line
1016                    if is_debit {
1017                        entry.lines[0].debit_amount = new_amount;
1018                    } else {
1019                        entry.lines[0].credit_amount = new_amount;
1020                    }
1021
1022                    // Rebalance the entry
1023                    Self::rebalance_entry(entry, is_debit, impact);
1024
1025                    entry.header.header_text = Some(
1026                        entry.header.header_text.clone().unwrap_or_default()
1027                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1028                    );
1029                }
1030            }
1031            2 => {
1032                // Typo in description (doesn't affect balance)
1033                if let Some(ref mut text) = entry.header.header_text {
1034                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1035                    let correct = ["the", "and", "with", "that", "receive"];
1036                    let idx = self.rng.gen_range(0..typos.len());
1037                    if text.to_lowercase().contains(correct[idx]) {
1038                        *text = text.replace(correct[idx], typos[idx]);
1039                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1040                    }
1041                }
1042            }
1043            3 => {
1044                // Rounding to round number
1045                if let Some(line) = entry.lines.get_mut(0) {
1046                    let is_debit = line.debit_amount > Decimal::ZERO;
1047                    let original_amount = if is_debit {
1048                        line.debit_amount
1049                    } else {
1050                        line.credit_amount
1051                    };
1052
1053                    let new_amount =
1054                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1055                    let impact = new_amount - original_amount;
1056
1057                    // Apply to the modified line
1058                    if is_debit {
1059                        entry.lines[0].debit_amount = new_amount;
1060                    } else {
1061                        entry.lines[0].credit_amount = new_amount;
1062                    }
1063
1064                    // Rebalance the entry
1065                    Self::rebalance_entry(entry, is_debit, impact);
1066
1067                    entry.header.header_text = Some(
1068                        entry.header.header_text.clone().unwrap_or_default()
1069                            + " [HUMAN_ERROR:ROUNDED]",
1070                    );
1071                }
1072            }
1073            4 => {
1074                // Late posting marker (document date much earlier than posting date)
1075                // This doesn't create an imbalance
1076                if entry.header.document_date == entry.header.posting_date {
1077                    let days_late = self.rng.gen_range(5..15);
1078                    entry.header.document_date =
1079                        entry.header.posting_date - chrono::Duration::days(days_late);
1080                    entry.header.header_text = Some(
1081                        entry.header.header_text.clone().unwrap_or_default()
1082                            + " [HUMAN_ERROR:LATE_POSTING]",
1083                    );
1084                }
1085            }
1086            _ => {}
1087        }
1088    }
1089
1090    /// Apply approval workflow for high-value transactions.
1091    ///
1092    /// If the entry amount exceeds the approval threshold, simulate an
1093    /// approval workflow with appropriate approvers based on amount.
1094    fn maybe_apply_approval_workflow(
1095        &mut self,
1096        entry: &mut JournalEntry,
1097        _posting_date: NaiveDate,
1098    ) {
1099        use rust_decimal::Decimal;
1100
1101        let amount = entry.total_debit();
1102
1103        // Skip if amount is below threshold
1104        if amount <= self.approval_threshold {
1105            // Auto-approved below threshold
1106            let workflow = ApprovalWorkflow::auto_approved(
1107                entry.header.created_by.clone(),
1108                entry.header.user_persona.clone(),
1109                amount,
1110                entry.header.created_at,
1111            );
1112            entry.header.approval_workflow = Some(workflow);
1113            return;
1114        }
1115
1116        // Mark as SOX relevant for high-value transactions
1117        entry.header.sox_relevant = true;
1118
1119        // Determine required approval levels based on amount
1120        let required_levels = if amount > Decimal::new(100000, 0) {
1121            3 // Executive approval required
1122        } else if amount > Decimal::new(50000, 0) {
1123            2 // Senior management approval
1124        } else {
1125            1 // Manager approval
1126        };
1127
1128        // Create the approval workflow
1129        let mut workflow = ApprovalWorkflow::new(
1130            entry.header.created_by.clone(),
1131            entry.header.user_persona.clone(),
1132            amount,
1133        );
1134        workflow.required_levels = required_levels;
1135
1136        // Simulate submission
1137        let submit_time = entry.header.created_at;
1138        let submit_action = ApprovalAction::new(
1139            entry.header.created_by.clone(),
1140            entry.header.user_persona.clone(),
1141            self.parse_persona(&entry.header.user_persona),
1142            ApprovalActionType::Submit,
1143            0,
1144        )
1145        .with_timestamp(submit_time);
1146
1147        workflow.actions.push(submit_action);
1148        workflow.status = ApprovalStatus::Pending;
1149        workflow.submitted_at = Some(submit_time);
1150
1151        // Simulate approvals with realistic delays
1152        let mut current_time = submit_time;
1153        for level in 1..=required_levels {
1154            // Add delay for approval (1-3 business hours per level)
1155            let delay_hours = self.rng.gen_range(1..4);
1156            current_time += chrono::Duration::hours(delay_hours);
1157
1158            // Skip weekends
1159            while current_time.weekday() == chrono::Weekday::Sat
1160                || current_time.weekday() == chrono::Weekday::Sun
1161            {
1162                current_time += chrono::Duration::days(1);
1163            }
1164
1165            // Generate approver based on level
1166            let (approver_id, approver_role) = self.select_approver(level);
1167
1168            let approve_action = ApprovalAction::new(
1169                approver_id.clone(),
1170                format!("{:?}", approver_role),
1171                approver_role,
1172                ApprovalActionType::Approve,
1173                level,
1174            )
1175            .with_timestamp(current_time);
1176
1177            workflow.actions.push(approve_action);
1178            workflow.current_level = level;
1179        }
1180
1181        // Mark as approved
1182        workflow.status = ApprovalStatus::Approved;
1183        workflow.approved_at = Some(current_time);
1184
1185        entry.header.approval_workflow = Some(workflow);
1186    }
1187
1188    /// Select an approver based on the required level.
1189    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1190        let persona = match level {
1191            1 => UserPersona::Manager,
1192            2 => UserPersona::Controller,
1193            _ => UserPersona::Executive,
1194        };
1195
1196        // Try to get from user pool first
1197        if let Some(ref pool) = self.user_pool {
1198            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1199                return (user.user_id.clone(), persona);
1200            }
1201        }
1202
1203        // Fallback to generated approver
1204        let approver_id = match persona {
1205            UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1206            UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1207            UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1208            _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1209        };
1210
1211        (approver_id, persona)
1212    }
1213
1214    /// Parse user persona from string.
1215    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1216        match persona_str.to_lowercase().as_str() {
1217            s if s.contains("junior") => UserPersona::JuniorAccountant,
1218            s if s.contains("senior") => UserPersona::SeniorAccountant,
1219            s if s.contains("controller") => UserPersona::Controller,
1220            s if s.contains("manager") => UserPersona::Manager,
1221            s if s.contains("executive") => UserPersona::Executive,
1222            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1223            _ => UserPersona::JuniorAccountant, // Default
1224        }
1225    }
1226
1227    /// Enable or disable approval workflow.
1228    pub fn with_approval(mut self, enabled: bool) -> Self {
1229        self.approval_enabled = enabled;
1230        self
1231    }
1232
1233    /// Set the approval threshold amount.
1234    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1235        self.approval_threshold = threshold;
1236        self
1237    }
1238
1239    /// Select a user from the pool or generate a generic user ID.
1240    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1241        if let Some(ref pool) = self.user_pool {
1242            let persona = if is_automated {
1243                UserPersona::AutomatedSystem
1244            } else {
1245                // Random distribution among human personas
1246                let roll: f64 = self.rng.gen();
1247                if roll < 0.4 {
1248                    UserPersona::JuniorAccountant
1249                } else if roll < 0.7 {
1250                    UserPersona::SeniorAccountant
1251                } else if roll < 0.85 {
1252                    UserPersona::Controller
1253                } else {
1254                    UserPersona::Manager
1255                }
1256            };
1257
1258            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1259                return (
1260                    user.user_id.clone(),
1261                    format!("{:?}", user.persona).to_lowercase(),
1262                );
1263            }
1264        }
1265
1266        // Fallback to generic format
1267        if is_automated {
1268            (
1269                format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1270                "automated_system".to_string(),
1271            )
1272        } else {
1273            (
1274                format!("USER{:04}", self.rng.gen_range(1..=40)),
1275                "senior_accountant".to_string(),
1276            )
1277        }
1278    }
1279
1280    /// Select transaction source based on configuration weights.
1281    fn select_source(&mut self) -> TransactionSource {
1282        let roll: f64 = self.rng.gen();
1283        let dist = &self.config.source_distribution;
1284
1285        if roll < dist.manual {
1286            TransactionSource::Manual
1287        } else if roll < dist.manual + dist.automated {
1288            TransactionSource::Automated
1289        } else if roll < dist.manual + dist.automated + dist.recurring {
1290            TransactionSource::Recurring
1291        } else {
1292            TransactionSource::Adjustment
1293        }
1294    }
1295
1296    /// Select a business process based on configuration weights.
1297    fn select_business_process(&mut self) -> BusinessProcess {
1298        let roll: f64 = self.rng.gen();
1299
1300        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1301        if roll < 0.35 {
1302            BusinessProcess::O2C
1303        } else if roll < 0.65 {
1304            BusinessProcess::P2P
1305        } else if roll < 0.85 {
1306            BusinessProcess::R2R
1307        } else if roll < 0.95 {
1308            BusinessProcess::H2R
1309        } else {
1310            BusinessProcess::A2R
1311        }
1312    }
1313
1314    fn select_debit_account(&mut self) -> &GLAccount {
1315        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1316        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1317
1318        // 60% asset, 40% expense for debits
1319        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1320            accounts
1321        } else {
1322            expense_accounts
1323        };
1324
1325        all.choose(&mut self.rng)
1326            .copied()
1327            .unwrap_or_else(|| &self.coa.accounts[0])
1328    }
1329
1330    fn select_credit_account(&mut self) -> &GLAccount {
1331        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1332        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1333
1334        // 60% liability, 40% revenue for credits
1335        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1336            liability_accounts
1337        } else {
1338            revenue_accounts
1339        };
1340
1341        all.choose(&mut self.rng)
1342            .copied()
1343            .unwrap_or_else(|| &self.coa.accounts[0])
1344    }
1345}
1346
1347impl Generator for JournalEntryGenerator {
1348    type Item = JournalEntry;
1349    type Config = (
1350        TransactionConfig,
1351        Arc<ChartOfAccounts>,
1352        Vec<String>,
1353        NaiveDate,
1354        NaiveDate,
1355    );
1356
1357    fn new(config: Self::Config, seed: u64) -> Self {
1358        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1359    }
1360
1361    fn generate_one(&mut self) -> Self::Item {
1362        self.generate()
1363    }
1364
1365    fn reset(&mut self) {
1366        self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1367        self.line_sampler.reset(self.seed + 1);
1368        self.amount_sampler.reset(self.seed + 2);
1369        self.temporal_sampler.reset(self.seed + 3);
1370        self.count = 0;
1371        self.uuid_factory.reset();
1372
1373        // Reset reference generator by recreating it
1374        let mut ref_gen = ReferenceGenerator::new(
1375            self.start_date.year(),
1376            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1377        );
1378        ref_gen.set_prefix(
1379            ReferenceType::Invoice,
1380            &self.template_config.references.invoice_prefix,
1381        );
1382        ref_gen.set_prefix(
1383            ReferenceType::PurchaseOrder,
1384            &self.template_config.references.po_prefix,
1385        );
1386        ref_gen.set_prefix(
1387            ReferenceType::SalesOrder,
1388            &self.template_config.references.so_prefix,
1389        );
1390        self.reference_generator = ref_gen;
1391    }
1392
1393    fn count(&self) -> u64 {
1394        self.count
1395    }
1396
1397    fn seed(&self) -> u64 {
1398        self.seed
1399    }
1400}
1401
1402#[cfg(test)]
1403mod tests {
1404    use super::*;
1405    use crate::ChartOfAccountsGenerator;
1406
1407    #[test]
1408    fn test_generate_balanced_entries() {
1409        let mut coa_gen =
1410            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1411        let coa = Arc::new(coa_gen.generate());
1412
1413        let mut je_gen = JournalEntryGenerator::new_with_params(
1414            TransactionConfig::default(),
1415            coa,
1416            vec!["1000".to_string()],
1417            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1418            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1419            42,
1420        );
1421
1422        let mut balanced_count = 0;
1423        for _ in 0..100 {
1424            let entry = je_gen.generate();
1425
1426            // Skip entries with human errors as they may be intentionally unbalanced
1427            let has_human_error = entry
1428                .header
1429                .header_text
1430                .as_ref()
1431                .map(|t| t.contains("[HUMAN_ERROR:"))
1432                .unwrap_or(false);
1433
1434            if !has_human_error {
1435                assert!(
1436                    entry.is_balanced(),
1437                    "Entry {:?} is not balanced",
1438                    entry.header.document_id
1439                );
1440                balanced_count += 1;
1441            }
1442            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1443        }
1444
1445        // Ensure most entries are balanced (human errors are rare)
1446        assert!(
1447            balanced_count >= 80,
1448            "Expected at least 80 balanced entries, got {}",
1449            balanced_count
1450        );
1451    }
1452
1453    #[test]
1454    fn test_deterministic_generation() {
1455        let mut coa_gen =
1456            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1457        let coa = Arc::new(coa_gen.generate());
1458
1459        let mut gen1 = JournalEntryGenerator::new_with_params(
1460            TransactionConfig::default(),
1461            Arc::clone(&coa),
1462            vec!["1000".to_string()],
1463            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1464            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1465            42,
1466        );
1467
1468        let mut gen2 = JournalEntryGenerator::new_with_params(
1469            TransactionConfig::default(),
1470            coa,
1471            vec!["1000".to_string()],
1472            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1473            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1474            42,
1475        );
1476
1477        for _ in 0..50 {
1478            let e1 = gen1.generate();
1479            let e2 = gen2.generate();
1480            assert_eq!(e1.header.document_id, e2.header.document_id);
1481            assert_eq!(e1.total_debit(), e2.total_debit());
1482        }
1483    }
1484
1485    #[test]
1486    fn test_templates_generate_descriptions() {
1487        let mut coa_gen =
1488            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1489        let coa = Arc::new(coa_gen.generate());
1490
1491        // Enable all template features
1492        let template_config = TemplateConfig {
1493            names: datasynth_config::schema::NameTemplateConfig {
1494                generate_realistic_names: true,
1495                email_domain: "test.com".to_string(),
1496                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1497            },
1498            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1499                generate_header_text: true,
1500                generate_line_text: true,
1501            },
1502            references: datasynth_config::schema::ReferenceTemplateConfig {
1503                generate_references: true,
1504                invoice_prefix: "TEST-INV".to_string(),
1505                po_prefix: "TEST-PO".to_string(),
1506                so_prefix: "TEST-SO".to_string(),
1507            },
1508        };
1509
1510        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1511            TransactionConfig::default(),
1512            coa,
1513            vec!["1000".to_string()],
1514            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1515            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1516            42,
1517            template_config,
1518            None,
1519        )
1520        .with_persona_errors(false); // Disable for template testing
1521
1522        for _ in 0..10 {
1523            let entry = je_gen.generate();
1524
1525            // Verify header text is populated
1526            assert!(
1527                entry.header.header_text.is_some(),
1528                "Header text should be populated"
1529            );
1530
1531            // Verify reference is populated
1532            assert!(
1533                entry.header.reference.is_some(),
1534                "Reference should be populated"
1535            );
1536
1537            // Verify business process is set
1538            assert!(
1539                entry.header.business_process.is_some(),
1540                "Business process should be set"
1541            );
1542
1543            // Verify line text is populated
1544            for line in &entry.lines {
1545                assert!(line.line_text.is_some(), "Line text should be populated");
1546            }
1547
1548            // Entry should still be balanced
1549            assert!(entry.is_balanced());
1550        }
1551    }
1552
1553    #[test]
1554    fn test_user_pool_integration() {
1555        let mut coa_gen =
1556            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1557        let coa = Arc::new(coa_gen.generate());
1558
1559        let companies = vec!["1000".to_string()];
1560
1561        // Generate user pool
1562        let mut user_gen = crate::UserGenerator::new(42);
1563        let user_pool = user_gen.generate_standard(&companies);
1564
1565        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1566            TransactionConfig::default(),
1567            coa,
1568            companies,
1569            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1570            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1571            42,
1572            TemplateConfig::default(),
1573            Some(user_pool),
1574        );
1575
1576        // Generate entries and verify user IDs are from pool
1577        for _ in 0..20 {
1578            let entry = je_gen.generate();
1579
1580            // User ID should not be generic BATCH/USER format when pool is used
1581            // (though it may still fall back if random selection misses)
1582            assert!(!entry.header.created_by.is_empty());
1583        }
1584    }
1585
1586    #[test]
1587    fn test_master_data_connection() {
1588        let mut coa_gen =
1589            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1590        let coa = Arc::new(coa_gen.generate());
1591
1592        // Create test vendors
1593        let vendors = vec![
1594            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1595            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1596        ];
1597
1598        // Create test customers
1599        let customers = vec![
1600            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1601            Customer::new(
1602                "C-TEST-002",
1603                "Test Customer Two",
1604                CustomerType::SmallBusiness,
1605            ),
1606        ];
1607
1608        // Create test materials
1609        let materials = vec![Material::new(
1610            "MAT-TEST-001",
1611            "Test Material A",
1612            MaterialType::RawMaterial,
1613        )];
1614
1615        // Create generator with master data
1616        let generator = JournalEntryGenerator::new_with_params(
1617            TransactionConfig::default(),
1618            coa,
1619            vec!["1000".to_string()],
1620            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1621            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1622            42,
1623        );
1624
1625        // Without master data
1626        assert!(!generator.is_using_real_master_data());
1627
1628        // Connect master data
1629        let generator_with_data = generator
1630            .with_vendors(&vendors)
1631            .with_customers(&customers)
1632            .with_materials(&materials);
1633
1634        // Should now be using real master data
1635        assert!(generator_with_data.is_using_real_master_data());
1636    }
1637
1638    #[test]
1639    fn test_with_master_data_convenience_method() {
1640        let mut coa_gen =
1641            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1642        let coa = Arc::new(coa_gen.generate());
1643
1644        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1645        let customers = vec![Customer::new(
1646            "C-001",
1647            "Customer One",
1648            CustomerType::Corporate,
1649        )];
1650        let materials = vec![Material::new(
1651            "MAT-001",
1652            "Material One",
1653            MaterialType::RawMaterial,
1654        )];
1655
1656        let generator = JournalEntryGenerator::new_with_params(
1657            TransactionConfig::default(),
1658            coa,
1659            vec!["1000".to_string()],
1660            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1661            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1662            42,
1663        )
1664        .with_master_data(&vendors, &customers, &materials);
1665
1666        assert!(generator.is_using_real_master_data());
1667    }
1668
1669    #[test]
1670    fn test_stress_factors_increase_error_rate() {
1671        let mut coa_gen =
1672            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1673        let coa = Arc::new(coa_gen.generate());
1674
1675        let generator = JournalEntryGenerator::new_with_params(
1676            TransactionConfig::default(),
1677            coa,
1678            vec!["1000".to_string()],
1679            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1680            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1681            42,
1682        );
1683
1684        let base_rate = 0.1;
1685
1686        // Regular day - no stress factors
1687        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
1688        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
1689        assert!(
1690            (regular_rate - base_rate).abs() < 0.01,
1691            "Regular day should have minimal stress factor adjustment"
1692        );
1693
1694        // Month end - 50% more errors
1695        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
1696        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
1697        assert!(
1698            month_end_rate > regular_rate,
1699            "Month end should have higher error rate than regular day"
1700        );
1701
1702        // Year end - double the error rate
1703        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
1704        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
1705        assert!(
1706            year_end_rate > month_end_rate,
1707            "Year end should have highest error rate"
1708        );
1709
1710        // Friday stress
1711        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
1712        let friday_rate = generator.apply_stress_factors(base_rate, friday);
1713        assert!(
1714            friday_rate > regular_rate,
1715            "Friday should have higher error rate than mid-week"
1716        );
1717
1718        // Monday stress
1719        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
1720        let monday_rate = generator.apply_stress_factors(base_rate, monday);
1721        assert!(
1722            monday_rate > regular_rate,
1723            "Monday should have higher error rate than mid-week"
1724        );
1725    }
1726
1727    #[test]
1728    fn test_batching_produces_similar_entries() {
1729        let mut coa_gen =
1730            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1731        let coa = Arc::new(coa_gen.generate());
1732
1733        // Use seed 123 which is more likely to trigger batching
1734        let mut je_gen = JournalEntryGenerator::new_with_params(
1735            TransactionConfig::default(),
1736            coa,
1737            vec!["1000".to_string()],
1738            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1739            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1740            123,
1741        )
1742        .with_persona_errors(false); // Disable to ensure balanced entries
1743
1744        // Generate many entries - at 15% batch rate, should see some batches
1745        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
1746
1747        // Check that all entries are balanced (batched or not)
1748        for entry in &entries {
1749            assert!(
1750                entry.is_balanced(),
1751                "All entries including batched should be balanced"
1752            );
1753        }
1754
1755        // Count entries with same-day posting dates (batch indicator)
1756        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
1757            std::collections::HashMap::new();
1758        for entry in &entries {
1759            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
1760        }
1761
1762        // With batching, some dates should have multiple entries
1763        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
1764        assert!(
1765            dates_with_multiple > 0,
1766            "With batching, should see some dates with multiple entries"
1767        );
1768    }
1769}