Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{FraudConfig, GeneratorConfig, TemplateConfig, TransactionConfig};
11use datasynth_core::distributions::{DriftAdjustments, DriftConfig, DriftController, *};
12use datasynth_core::models::*;
13use datasynth_core::templates::{
14    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
15};
16use datasynth_core::traits::Generator;
17use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
18
19use crate::company_selector::WeightedCompanySelector;
20use crate::user_generator::{UserGenerator, UserGeneratorConfig};
21
22/// Generator for realistic journal entries.
23pub struct JournalEntryGenerator {
24    rng: ChaCha8Rng,
25    seed: u64,
26    config: TransactionConfig,
27    coa: Arc<ChartOfAccounts>,
28    companies: Vec<String>,
29    company_selector: WeightedCompanySelector,
30    line_sampler: LineItemSampler,
31    amount_sampler: AmountSampler,
32    temporal_sampler: TemporalSampler,
33    start_date: NaiveDate,
34    end_date: NaiveDate,
35    count: u64,
36    uuid_factory: DeterministicUuidFactory,
37    // Enhanced features
38    user_pool: Option<UserPool>,
39    description_generator: DescriptionGenerator,
40    reference_generator: ReferenceGenerator,
41    template_config: TemplateConfig,
42    vendor_pool: VendorPool,
43    customer_pool: CustomerPool,
44    // Material pool for realistic material references
45    material_pool: Option<MaterialPool>,
46    // Flag indicating whether we're using real master data vs defaults
47    using_real_master_data: bool,
48    // Fraud generation
49    fraud_config: FraudConfig,
50    // Persona-based error injection
51    persona_errors_enabled: bool,
52    // Approval threshold enforcement
53    approval_enabled: bool,
54    approval_threshold: rust_decimal::Decimal,
55    // Batching behavior - humans often process similar items together
56    batch_state: Option<BatchState>,
57    // Temporal drift controller for simulating distribution changes over time
58    drift_controller: Option<DriftController>,
59}
60
61/// State for tracking batch processing behavior.
62///
63/// When humans process transactions, they often batch similar items together
64/// (e.g., processing all invoices from one vendor, entering similar expenses).
65#[derive(Clone)]
66struct BatchState {
67    /// The base entry template to vary
68    base_vendor: Option<String>,
69    base_customer: Option<String>,
70    base_account_number: String,
71    base_amount: rust_decimal::Decimal,
72    base_business_process: Option<BusinessProcess>,
73    base_posting_date: NaiveDate,
74    /// Remaining entries in this batch
75    remaining: u8,
76}
77
78impl JournalEntryGenerator {
79    /// Create a new journal entry generator.
80    pub fn new_with_params(
81        config: TransactionConfig,
82        coa: Arc<ChartOfAccounts>,
83        companies: Vec<String>,
84        start_date: NaiveDate,
85        end_date: NaiveDate,
86        seed: u64,
87    ) -> Self {
88        Self::new_with_full_config(
89            config,
90            coa,
91            companies,
92            start_date,
93            end_date,
94            seed,
95            TemplateConfig::default(),
96            None,
97        )
98    }
99
100    /// Create a new journal entry generator with full configuration.
101    #[allow(clippy::too_many_arguments)]
102    pub fn new_with_full_config(
103        config: TransactionConfig,
104        coa: Arc<ChartOfAccounts>,
105        companies: Vec<String>,
106        start_date: NaiveDate,
107        end_date: NaiveDate,
108        seed: u64,
109        template_config: TemplateConfig,
110        user_pool: Option<UserPool>,
111    ) -> Self {
112        // Initialize user pool if not provided
113        let user_pool = user_pool.or_else(|| {
114            if template_config.names.generate_realistic_names {
115                let user_gen_config = UserGeneratorConfig {
116                    culture_distribution: vec![
117                        (
118                            datasynth_core::templates::NameCulture::WesternUs,
119                            template_config.names.culture_distribution.western_us,
120                        ),
121                        (
122                            datasynth_core::templates::NameCulture::Hispanic,
123                            template_config.names.culture_distribution.hispanic,
124                        ),
125                        (
126                            datasynth_core::templates::NameCulture::German,
127                            template_config.names.culture_distribution.german,
128                        ),
129                        (
130                            datasynth_core::templates::NameCulture::French,
131                            template_config.names.culture_distribution.french,
132                        ),
133                        (
134                            datasynth_core::templates::NameCulture::Chinese,
135                            template_config.names.culture_distribution.chinese,
136                        ),
137                        (
138                            datasynth_core::templates::NameCulture::Japanese,
139                            template_config.names.culture_distribution.japanese,
140                        ),
141                        (
142                            datasynth_core::templates::NameCulture::Indian,
143                            template_config.names.culture_distribution.indian,
144                        ),
145                    ],
146                    email_domain: template_config.names.email_domain.clone(),
147                    generate_realistic_names: true,
148                };
149                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
150                Some(user_gen.generate_standard(&companies))
151            } else {
152                None
153            }
154        });
155
156        // Initialize reference generator
157        let mut ref_gen = ReferenceGenerator::new(
158            start_date.year(),
159            companies.first().map(|s| s.as_str()).unwrap_or("1000"),
160        );
161        ref_gen.set_prefix(
162            ReferenceType::Invoice,
163            &template_config.references.invoice_prefix,
164        );
165        ref_gen.set_prefix(
166            ReferenceType::PurchaseOrder,
167            &template_config.references.po_prefix,
168        );
169        ref_gen.set_prefix(
170            ReferenceType::SalesOrder,
171            &template_config.references.so_prefix,
172        );
173
174        // Create weighted company selector (uniform weights for this constructor)
175        let company_selector = WeightedCompanySelector::uniform(companies.clone());
176
177        Self {
178            rng: ChaCha8Rng::seed_from_u64(seed),
179            seed,
180            config: config.clone(),
181            coa,
182            companies,
183            company_selector,
184            line_sampler: LineItemSampler::with_config(
185                seed + 1,
186                config.line_item_distribution.clone(),
187                config.even_odd_distribution.clone(),
188                config.debit_credit_distribution.clone(),
189            ),
190            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
191            temporal_sampler: TemporalSampler::with_config(
192                seed + 3,
193                config.seasonality.clone(),
194                WorkingHoursConfig::default(),
195                Vec::new(),
196            ),
197            start_date,
198            end_date,
199            count: 0,
200            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
201            user_pool,
202            description_generator: DescriptionGenerator::new(),
203            reference_generator: ref_gen,
204            template_config,
205            vendor_pool: VendorPool::standard(),
206            customer_pool: CustomerPool::standard(),
207            material_pool: None,
208            using_real_master_data: false,
209            fraud_config: FraudConfig::default(),
210            persona_errors_enabled: true, // Enable by default for realism
211            approval_enabled: true,       // Enable by default for realism
212            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
213            batch_state: None,
214            drift_controller: None,
215        }
216    }
217
218    /// Create from a full GeneratorConfig.
219    ///
220    /// This constructor uses the volume_weight from company configs
221    /// for weighted company selection, and fraud config from GeneratorConfig.
222    pub fn from_generator_config(
223        full_config: &GeneratorConfig,
224        coa: Arc<ChartOfAccounts>,
225        start_date: NaiveDate,
226        end_date: NaiveDate,
227        seed: u64,
228    ) -> Self {
229        let companies: Vec<String> = full_config
230            .companies
231            .iter()
232            .map(|c| c.code.clone())
233            .collect();
234
235        // Create weighted selector using volume_weight from company configs
236        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
237
238        let mut generator = Self::new_with_full_config(
239            full_config.transactions.clone(),
240            coa,
241            companies,
242            start_date,
243            end_date,
244            seed,
245            full_config.templates.clone(),
246            None,
247        );
248
249        // Override the uniform selector with weighted selector
250        generator.company_selector = company_selector;
251
252        // Set fraud config
253        generator.fraud_config = full_config.fraud.clone();
254
255        generator
256    }
257
258    /// Set a custom company selector.
259    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
260        self.company_selector = selector;
261    }
262
263    /// Get the current company selector.
264    pub fn company_selector(&self) -> &WeightedCompanySelector {
265        &self.company_selector
266    }
267
268    /// Set fraud configuration.
269    pub fn set_fraud_config(&mut self, config: FraudConfig) {
270        self.fraud_config = config;
271    }
272
273    /// Set vendors from generated master data.
274    ///
275    /// This replaces the default vendor pool with actual generated vendors,
276    /// ensuring JEs reference real master data entities.
277    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
278        if !vendors.is_empty() {
279            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
280            self.using_real_master_data = true;
281        }
282        self
283    }
284
285    /// Set customers from generated master data.
286    ///
287    /// This replaces the default customer pool with actual generated customers,
288    /// ensuring JEs reference real master data entities.
289    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
290        if !customers.is_empty() {
291            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
292            self.using_real_master_data = true;
293        }
294        self
295    }
296
297    /// Set materials from generated master data.
298    ///
299    /// This provides material references for JEs that involve inventory movements.
300    pub fn with_materials(mut self, materials: &[Material]) -> Self {
301        if !materials.is_empty() {
302            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
303            self.using_real_master_data = true;
304        }
305        self
306    }
307
308    /// Set all master data at once for convenience.
309    ///
310    /// This is the recommended way to configure the JE generator with
311    /// generated master data to ensure data coherence.
312    pub fn with_master_data(
313        self,
314        vendors: &[Vendor],
315        customers: &[Customer],
316        materials: &[Material],
317    ) -> Self {
318        self.with_vendors(vendors)
319            .with_customers(customers)
320            .with_materials(materials)
321    }
322
323    /// Check if the generator is using real master data.
324    pub fn is_using_real_master_data(&self) -> bool {
325        self.using_real_master_data
326    }
327
328    /// Determine if this transaction should be fraudulent.
329    fn determine_fraud(&mut self) -> Option<FraudType> {
330        if !self.fraud_config.enabled {
331            return None;
332        }
333
334        // Roll for fraud based on fraud rate
335        if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
336            return None;
337        }
338
339        // Select fraud type based on distribution
340        Some(self.select_fraud_type())
341    }
342
343    /// Select a fraud type based on the configured distribution.
344    fn select_fraud_type(&mut self) -> FraudType {
345        let dist = &self.fraud_config.fraud_type_distribution;
346        let roll: f64 = self.rng.gen();
347
348        let mut cumulative = 0.0;
349
350        cumulative += dist.suspense_account_abuse;
351        if roll < cumulative {
352            return FraudType::SuspenseAccountAbuse;
353        }
354
355        cumulative += dist.fictitious_transaction;
356        if roll < cumulative {
357            return FraudType::FictitiousTransaction;
358        }
359
360        cumulative += dist.revenue_manipulation;
361        if roll < cumulative {
362            return FraudType::RevenueManipulation;
363        }
364
365        cumulative += dist.expense_capitalization;
366        if roll < cumulative {
367            return FraudType::ExpenseCapitalization;
368        }
369
370        cumulative += dist.split_transaction;
371        if roll < cumulative {
372            return FraudType::SplitTransaction;
373        }
374
375        cumulative += dist.timing_anomaly;
376        if roll < cumulative {
377            return FraudType::TimingAnomaly;
378        }
379
380        cumulative += dist.unauthorized_access;
381        if roll < cumulative {
382            return FraudType::UnauthorizedAccess;
383        }
384
385        // Default fallback
386        FraudType::DuplicatePayment
387    }
388
389    /// Map a fraud type to an amount pattern for suspicious amounts.
390    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
391        match fraud_type {
392            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
393                FraudAmountPattern::ThresholdAdjacent
394            }
395            FraudType::FictitiousTransaction
396            | FraudType::FictitiousEntry
397            | FraudType::SuspenseAccountAbuse
398            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
399            FraudType::RevenueManipulation
400            | FraudType::ExpenseCapitalization
401            | FraudType::ImproperCapitalization
402            | FraudType::ReserveManipulation
403            | FraudType::UnauthorizedAccess
404            | FraudType::PrematureRevenue
405            | FraudType::UnderstatedLiabilities
406            | FraudType::OverstatedAssets
407            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
408            FraudType::DuplicatePayment
409            | FraudType::TimingAnomaly
410            | FraudType::SelfApproval
411            | FraudType::ExceededApprovalLimit
412            | FraudType::SegregationOfDutiesViolation
413            | FraudType::UnauthorizedApproval
414            | FraudType::CollusiveApproval
415            | FraudType::FictitiousVendor
416            | FraudType::ShellCompanyPayment
417            | FraudType::Kickback
418            | FraudType::KickbackScheme
419            | FraudType::InvoiceManipulation
420            | FraudType::AssetMisappropriation
421            | FraudType::InventoryTheft
422            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
423            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
424            FraudType::ImproperRevenueRecognition
425            | FraudType::ImproperPoAllocation
426            | FraudType::VariableConsiderationManipulation
427            | FraudType::ContractModificationMisstatement => {
428                FraudAmountPattern::StatisticallyImprobable
429            }
430            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
431            FraudType::LeaseClassificationManipulation
432            | FraudType::OffBalanceSheetLease
433            | FraudType::LeaseLiabilityUnderstatement
434            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
435            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
436            FraudType::FairValueHierarchyManipulation
437            | FraudType::Level3InputManipulation
438            | FraudType::ValuationTechniqueManipulation => {
439                FraudAmountPattern::StatisticallyImprobable
440            }
441            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
442            FraudType::DelayedImpairment
443            | FraudType::ImpairmentTestAvoidance
444            | FraudType::CashFlowProjectionManipulation
445            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
446        }
447    }
448
449    /// Generate a deterministic UUID using the factory.
450    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
451        self.uuid_factory.next()
452    }
453
454    /// Generate a single journal entry.
455    pub fn generate(&mut self) -> JournalEntry {
456        // Check if we're in a batch - if so, generate a batched entry
457        if let Some(ref state) = self.batch_state {
458            if state.remaining > 0 {
459                return self.generate_batched_entry();
460            }
461        }
462
463        self.count += 1;
464
465        // Generate deterministic document ID
466        let document_id = self.generate_deterministic_uuid();
467
468        // Sample posting date
469        let posting_date = self
470            .temporal_sampler
471            .sample_date(self.start_date, self.end_date);
472
473        // Select company using weighted selector
474        let company_code = self.company_selector.select(&mut self.rng).to_string();
475
476        // Sample line item specification
477        let line_spec = self.line_sampler.sample();
478
479        // Determine source type using full 4-way distribution
480        let source = self.select_source();
481        let is_automated = matches!(
482            source,
483            TransactionSource::Automated | TransactionSource::Recurring
484        );
485
486        // Select business process
487        let business_process = self.select_business_process();
488
489        // Determine if this is a fraudulent transaction
490        let fraud_type = self.determine_fraud();
491        let is_fraud = fraud_type.is_some();
492
493        // Sample time based on source
494        let time = self.temporal_sampler.sample_time(!is_automated);
495        let created_at = posting_date.and_time(time).and_utc();
496
497        // Select user from pool or generate generic
498        let (created_by, user_persona) = self.select_user(is_automated);
499
500        // Create header with deterministic UUID
501        let mut header =
502            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
503        header.created_at = created_at;
504        header.source = source;
505        header.created_by = created_by;
506        header.user_persona = user_persona;
507        header.business_process = Some(business_process);
508        header.is_fraud = is_fraud;
509        header.fraud_type = fraud_type;
510
511        // Generate description context
512        let mut context =
513            DescriptionContext::with_period(posting_date.month(), posting_date.year());
514
515        // Add vendor/customer context based on business process
516        match business_process {
517            BusinessProcess::P2P => {
518                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
519                    context.vendor_name = Some(vendor.name.clone());
520                }
521            }
522            BusinessProcess::O2C => {
523                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
524                    context.customer_name = Some(customer.name.clone());
525                }
526            }
527            _ => {}
528        }
529
530        // Generate header text if enabled
531        if self.template_config.descriptions.generate_header_text {
532            header.header_text = Some(self.description_generator.generate_header_text(
533                business_process,
534                &context,
535                &mut self.rng,
536            ));
537        }
538
539        // Generate reference if enabled
540        if self.template_config.references.generate_references {
541            header.reference = Some(
542                self.reference_generator
543                    .generate_for_process_year(business_process, posting_date.year()),
544            );
545        }
546
547        // Generate line items
548        let mut entry = JournalEntry::new(header);
549
550        // Generate amount - use fraud pattern if this is a fraudulent transaction
551        let base_amount = if let Some(ft) = fraud_type {
552            let pattern = self.fraud_type_to_amount_pattern(ft);
553            self.amount_sampler.sample_fraud(pattern)
554        } else {
555            self.amount_sampler.sample()
556        };
557
558        // Apply temporal drift if configured
559        let drift_adjusted_amount = {
560            let drift = self.get_drift_adjustments(posting_date);
561            if drift.amount_mean_multiplier != 1.0 {
562                // Apply drift multiplier (includes seasonal factor if enabled)
563                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
564                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
565                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
566            } else {
567                base_amount
568            }
569        };
570
571        // Apply human variation to amounts for non-automated transactions
572        let total_amount = if is_automated {
573            drift_adjusted_amount // Automated systems use exact amounts
574        } else {
575            self.apply_human_variation(drift_adjusted_amount)
576        };
577
578        // Generate debit lines
579        let debit_amounts = self
580            .amount_sampler
581            .sample_summing_to(line_spec.debit_count, total_amount);
582        for (i, amount) in debit_amounts.into_iter().enumerate() {
583            let account_number = self.select_debit_account().account_number.clone();
584            let mut line = JournalEntryLine::debit(
585                entry.header.document_id,
586                (i + 1) as u32,
587                account_number.clone(),
588                amount,
589            );
590
591            // Generate line text if enabled
592            if self.template_config.descriptions.generate_line_text {
593                line.line_text = Some(self.description_generator.generate_line_text(
594                    &account_number,
595                    &context,
596                    &mut self.rng,
597                ));
598            }
599
600            entry.add_line(line);
601        }
602
603        // Generate credit lines - use the SAME amounts to ensure balance
604        let credit_amounts = self
605            .amount_sampler
606            .sample_summing_to(line_spec.credit_count, total_amount);
607        for (i, amount) in credit_amounts.into_iter().enumerate() {
608            let account_number = self.select_credit_account().account_number.clone();
609            let mut line = JournalEntryLine::credit(
610                entry.header.document_id,
611                (line_spec.debit_count + i + 1) as u32,
612                account_number.clone(),
613                amount,
614            );
615
616            // Generate line text if enabled
617            if self.template_config.descriptions.generate_line_text {
618                line.line_text = Some(self.description_generator.generate_line_text(
619                    &account_number,
620                    &context,
621                    &mut self.rng,
622                ));
623            }
624
625            entry.add_line(line);
626        }
627
628        // Apply persona-based errors if enabled and it's a human user
629        if self.persona_errors_enabled && !is_automated {
630            self.maybe_inject_persona_error(&mut entry);
631        }
632
633        // Apply approval workflow if enabled and amount exceeds threshold
634        if self.approval_enabled {
635            self.maybe_apply_approval_workflow(&mut entry, posting_date);
636        }
637
638        // Maybe start a batch of similar entries for realism
639        self.maybe_start_batch(&entry);
640
641        entry
642    }
643
644    /// Enable or disable persona-based error injection.
645    ///
646    /// When enabled, entries created by human personas have a chance
647    /// to contain realistic human errors based on their experience level.
648    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
649        self.persona_errors_enabled = enabled;
650        self
651    }
652
653    /// Set fraud configuration for fraud injection.
654    ///
655    /// When fraud is enabled in the config, transactions have a chance
656    /// to be marked as fraudulent based on the configured fraud rate.
657    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
658        self.fraud_config = config;
659        self
660    }
661
662    /// Check if persona errors are enabled.
663    pub fn persona_errors_enabled(&self) -> bool {
664        self.persona_errors_enabled
665    }
666
667    /// Enable or disable batch processing behavior.
668    ///
669    /// When enabled (default), the generator will occasionally produce batches
670    /// of similar entries, simulating how humans batch similar work together.
671    pub fn with_batching(mut self, enabled: bool) -> Self {
672        if !enabled {
673            self.batch_state = None;
674        }
675        self
676    }
677
678    /// Check if batch processing is enabled.
679    pub fn batching_enabled(&self) -> bool {
680        // Batching is implicitly enabled when not explicitly disabled
681        true
682    }
683
684    /// Maybe start a batch based on the current entry.
685    ///
686    /// Humans often batch similar work: processing invoices from one vendor,
687    /// entering expense reports for a trip, reconciling similar items.
688    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
689        // Only start batch for non-automated, non-fraud entries
690        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
691            return;
692        }
693
694        // 15% chance to start a batch (most work is not batched)
695        if self.rng.gen::<f64>() > 0.15 {
696            return;
697        }
698
699        // Extract key attributes for batching
700        let base_account = entry
701            .lines
702            .first()
703            .map(|l| l.gl_account.clone())
704            .unwrap_or_default();
705
706        let base_amount = entry.total_debit();
707
708        self.batch_state = Some(BatchState {
709            base_vendor: None, // Would need vendor from context
710            base_customer: None,
711            base_account_number: base_account,
712            base_amount,
713            base_business_process: entry.header.business_process,
714            base_posting_date: entry.header.posting_date,
715            remaining: self.rng.gen_range(2..7), // 2-6 more similar entries
716        });
717    }
718
719    /// Generate an entry that's part of the current batch.
720    ///
721    /// Batched entries have:
722    /// - Same or very similar business process
723    /// - Same posting date (batched work done together)
724    /// - Similar amounts (within ±15%)
725    /// - Same debit account (processing similar items)
726    fn generate_batched_entry(&mut self) -> JournalEntry {
727        use rust_decimal::Decimal;
728
729        // Decrement batch counter
730        if let Some(ref mut state) = self.batch_state {
731            state.remaining = state.remaining.saturating_sub(1);
732        }
733
734        let batch = self.batch_state.clone().unwrap();
735
736        // Use the batch's posting date (work done on same day)
737        let posting_date = batch.base_posting_date;
738
739        self.count += 1;
740        let document_id = self.generate_deterministic_uuid();
741
742        // Select same company (batched work is usually same company)
743        let company_code = self.company_selector.select(&mut self.rng).to_string();
744
745        // Use simplified line spec for batched entries (usually 2-line)
746        let _line_spec = LineItemSpec {
747            total_count: 2,
748            debit_count: 1,
749            credit_count: 1,
750            split_type: DebitCreditSplit::Equal,
751        };
752
753        // Batched entries are always manual
754        let source = TransactionSource::Manual;
755
756        // Use the batch's business process
757        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
758
759        // Sample time
760        let time = self.temporal_sampler.sample_time(true);
761        let created_at = posting_date.and_time(time).and_utc();
762
763        // Same user for batched work
764        let (created_by, user_persona) = self.select_user(false);
765
766        // Create header
767        let mut header =
768            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
769        header.created_at = created_at;
770        header.source = source;
771        header.created_by = created_by;
772        header.user_persona = user_persona;
773        header.business_process = Some(business_process);
774
775        // Generate similar amount (within ±15% of base)
776        let variation = self.rng.gen_range(-0.15..0.15);
777        let varied_amount =
778            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
779        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
780
781        // Create the entry
782        let mut entry = JournalEntry::new(header);
783
784        // Use same debit account as batch base
785        let debit_line = JournalEntryLine::debit(
786            entry.header.document_id,
787            1,
788            batch.base_account_number.clone(),
789            total_amount,
790        );
791        entry.add_line(debit_line);
792
793        // Select a credit account
794        let credit_account = self.select_credit_account().account_number.clone();
795        let credit_line =
796            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
797        entry.add_line(credit_line);
798
799        // Apply persona-based errors if enabled
800        if self.persona_errors_enabled {
801            self.maybe_inject_persona_error(&mut entry);
802        }
803
804        // Apply approval workflow if enabled
805        if self.approval_enabled {
806            self.maybe_apply_approval_workflow(&mut entry, posting_date);
807        }
808
809        // Clear batch state if no more entries remaining
810        if batch.remaining <= 1 {
811            self.batch_state = None;
812        }
813
814        entry
815    }
816
817    /// Maybe inject a persona-appropriate error based on the persona's error rate.
818    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
819        // Parse persona from the entry header
820        let persona_str = &entry.header.user_persona;
821        let persona = match persona_str.to_lowercase().as_str() {
822            s if s.contains("junior") => UserPersona::JuniorAccountant,
823            s if s.contains("senior") => UserPersona::SeniorAccountant,
824            s if s.contains("controller") => UserPersona::Controller,
825            s if s.contains("manager") => UserPersona::Manager,
826            s if s.contains("executive") => UserPersona::Executive,
827            _ => return, // Don't inject errors for unknown personas
828        };
829
830        // Get base error rate from persona
831        let base_error_rate = persona.error_rate();
832
833        // Apply stress factors based on posting date
834        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
835
836        // Check if error should occur based on adjusted rate
837        if self.rng.gen::<f64>() >= adjusted_rate {
838            return; // No error this time
839        }
840
841        // Select and inject persona-appropriate error
842        self.inject_human_error(entry, persona);
843    }
844
845    /// Apply contextual stress factors to the base error rate.
846    ///
847    /// Stress factors increase error likelihood during:
848    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
849    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
850    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
851    /// - Monday morning (catch-up work): 20% more errors
852    /// - Friday afternoon (rushing to leave): 30% more errors
853    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
854        use chrono::Datelike;
855
856        let mut rate = base_rate;
857        let day = posting_date.day();
858        let month = posting_date.month();
859
860        // Year-end stress (December 28-31): double the error rate
861        if month == 12 && day >= 28 {
862            rate *= 2.0;
863            return rate.min(0.5); // Cap at 50% to keep it realistic
864        }
865
866        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
867        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
868            rate *= 1.75; // 75% more errors at quarter end
869            return rate.min(0.4);
870        }
871
872        // Month-end stress (last 3 days of month)
873        if day >= 28 {
874            rate *= 1.5; // 50% more errors at month end
875        }
876
877        // Day-of-week stress effects
878        let weekday = posting_date.weekday();
879        match weekday {
880            chrono::Weekday::Mon => {
881                // Monday: catching up, often rushed
882                rate *= 1.2;
883            }
884            chrono::Weekday::Fri => {
885                // Friday: rushing to finish before weekend
886                rate *= 1.3;
887            }
888            _ => {}
889        }
890
891        // Cap at 40% to keep it realistic
892        rate.min(0.4)
893    }
894
895    /// Apply human-like variation to an amount.
896    ///
897    /// Humans don't enter perfectly calculated amounts - they:
898    /// - Round amounts differently
899    /// - Estimate instead of calculating exactly
900    /// - Make small input variations
901    ///
902    /// This applies small variations (typically ±2%) to make amounts more realistic.
903    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
904        use rust_decimal::Decimal;
905
906        // Automated transactions or very small amounts don't get variation
907        if amount < Decimal::from(10) {
908            return amount;
909        }
910
911        // 70% chance of human variation being applied
912        if self.rng.gen::<f64>() > 0.70 {
913            return amount;
914        }
915
916        // Decide which type of human variation to apply
917        let variation_type: u8 = self.rng.gen_range(0..4);
918
919        match variation_type {
920            0 => {
921                // ±2% variation (common for estimated amounts)
922                let variation_pct = self.rng.gen_range(-0.02..0.02);
923                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
924                (amount + variation).round_dp(2)
925            }
926            1 => {
927                // Round to nearest $10
928                let ten = Decimal::from(10);
929                (amount / ten).round() * ten
930            }
931            2 => {
932                // Round to nearest $100 (for larger amounts)
933                if amount >= Decimal::from(500) {
934                    let hundred = Decimal::from(100);
935                    (amount / hundred).round() * hundred
936                } else {
937                    amount
938                }
939            }
940            3 => {
941                // Slight under/over payment (±$0.01 to ±$1.00)
942                let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
943                (amount + cents).max(Decimal::ZERO).round_dp(2)
944            }
945            _ => amount,
946        }
947    }
948
949    /// Rebalance an entry after a one-sided amount modification.
950    ///
951    /// When an error modifies one line's amount, this finds a line on the opposite
952    /// side (credit if modified was debit, or vice versa) and adjusts it by the
953    /// same impact to maintain balance.
954    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
955        // Find a line on the opposite side to adjust
956        let balancing_idx = entry.lines.iter().position(|l| {
957            if modified_was_debit {
958                l.credit_amount > Decimal::ZERO
959            } else {
960                l.debit_amount > Decimal::ZERO
961            }
962        });
963
964        if let Some(idx) = balancing_idx {
965            if modified_was_debit {
966                entry.lines[idx].credit_amount += impact;
967            } else {
968                entry.lines[idx].debit_amount += impact;
969            }
970        }
971    }
972
973    /// Inject a human-like error based on the persona.
974    ///
975    /// All error types maintain balance - amount modifications are applied to both sides.
976    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
977    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
978        use rust_decimal::Decimal;
979
980        // Different personas make different types of errors
981        let error_type: u8 = match persona {
982            UserPersona::JuniorAccountant => {
983                // Junior accountants make more varied errors
984                self.rng.gen_range(0..5)
985            }
986            UserPersona::SeniorAccountant => {
987                // Senior accountants mainly make transposition errors
988                self.rng.gen_range(0..3)
989            }
990            UserPersona::Controller | UserPersona::Manager => {
991                // Controllers/managers mainly make rounding or cutoff errors
992                self.rng.gen_range(3..5)
993            }
994            _ => return,
995        };
996
997        match error_type {
998            0 => {
999                // Transposed digits in an amount
1000                if let Some(line) = entry.lines.get_mut(0) {
1001                    let is_debit = line.debit_amount > Decimal::ZERO;
1002                    let original_amount = if is_debit {
1003                        line.debit_amount
1004                    } else {
1005                        line.credit_amount
1006                    };
1007
1008                    // Simple digit swap in the string representation
1009                    let s = original_amount.to_string();
1010                    if s.len() >= 2 {
1011                        let chars: Vec<char> = s.chars().collect();
1012                        let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1013                        if chars[pos].is_ascii_digit()
1014                            && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1015                        {
1016                            let mut new_chars = chars;
1017                            new_chars.swap(pos, pos + 1);
1018                            if let Ok(new_amount) =
1019                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1020                            {
1021                                let impact = new_amount - original_amount;
1022
1023                                // Apply to the modified line
1024                                if is_debit {
1025                                    entry.lines[0].debit_amount = new_amount;
1026                                } else {
1027                                    entry.lines[0].credit_amount = new_amount;
1028                                }
1029
1030                                // Rebalance the entry
1031                                Self::rebalance_entry(entry, is_debit, impact);
1032
1033                                entry.header.header_text = Some(
1034                                    entry.header.header_text.clone().unwrap_or_default()
1035                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1036                                );
1037                            }
1038                        }
1039                    }
1040                }
1041            }
1042            1 => {
1043                // Wrong decimal place (off by factor of 10)
1044                if let Some(line) = entry.lines.get_mut(0) {
1045                    let is_debit = line.debit_amount > Decimal::ZERO;
1046                    let original_amount = if is_debit {
1047                        line.debit_amount
1048                    } else {
1049                        line.credit_amount
1050                    };
1051
1052                    let new_amount = original_amount * Decimal::new(10, 0);
1053                    let impact = new_amount - original_amount;
1054
1055                    // Apply to the modified line
1056                    if is_debit {
1057                        entry.lines[0].debit_amount = new_amount;
1058                    } else {
1059                        entry.lines[0].credit_amount = new_amount;
1060                    }
1061
1062                    // Rebalance the entry
1063                    Self::rebalance_entry(entry, is_debit, impact);
1064
1065                    entry.header.header_text = Some(
1066                        entry.header.header_text.clone().unwrap_or_default()
1067                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1068                    );
1069                }
1070            }
1071            2 => {
1072                // Typo in description (doesn't affect balance)
1073                if let Some(ref mut text) = entry.header.header_text {
1074                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1075                    let correct = ["the", "and", "with", "that", "receive"];
1076                    let idx = self.rng.gen_range(0..typos.len());
1077                    if text.to_lowercase().contains(correct[idx]) {
1078                        *text = text.replace(correct[idx], typos[idx]);
1079                        *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1080                    }
1081                }
1082            }
1083            3 => {
1084                // Rounding to round number
1085                if let Some(line) = entry.lines.get_mut(0) {
1086                    let is_debit = line.debit_amount > Decimal::ZERO;
1087                    let original_amount = if is_debit {
1088                        line.debit_amount
1089                    } else {
1090                        line.credit_amount
1091                    };
1092
1093                    let new_amount =
1094                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1095                    let impact = new_amount - original_amount;
1096
1097                    // Apply to the modified line
1098                    if is_debit {
1099                        entry.lines[0].debit_amount = new_amount;
1100                    } else {
1101                        entry.lines[0].credit_amount = new_amount;
1102                    }
1103
1104                    // Rebalance the entry
1105                    Self::rebalance_entry(entry, is_debit, impact);
1106
1107                    entry.header.header_text = Some(
1108                        entry.header.header_text.clone().unwrap_or_default()
1109                            + " [HUMAN_ERROR:ROUNDED]",
1110                    );
1111                }
1112            }
1113            4 => {
1114                // Late posting marker (document date much earlier than posting date)
1115                // This doesn't create an imbalance
1116                if entry.header.document_date == entry.header.posting_date {
1117                    let days_late = self.rng.gen_range(5..15);
1118                    entry.header.document_date =
1119                        entry.header.posting_date - chrono::Duration::days(days_late);
1120                    entry.header.header_text = Some(
1121                        entry.header.header_text.clone().unwrap_or_default()
1122                            + " [HUMAN_ERROR:LATE_POSTING]",
1123                    );
1124                }
1125            }
1126            _ => {}
1127        }
1128    }
1129
1130    /// Apply approval workflow for high-value transactions.
1131    ///
1132    /// If the entry amount exceeds the approval threshold, simulate an
1133    /// approval workflow with appropriate approvers based on amount.
1134    fn maybe_apply_approval_workflow(
1135        &mut self,
1136        entry: &mut JournalEntry,
1137        _posting_date: NaiveDate,
1138    ) {
1139        use rust_decimal::Decimal;
1140
1141        let amount = entry.total_debit();
1142
1143        // Skip if amount is below threshold
1144        if amount <= self.approval_threshold {
1145            // Auto-approved below threshold
1146            let workflow = ApprovalWorkflow::auto_approved(
1147                entry.header.created_by.clone(),
1148                entry.header.user_persona.clone(),
1149                amount,
1150                entry.header.created_at,
1151            );
1152            entry.header.approval_workflow = Some(workflow);
1153            return;
1154        }
1155
1156        // Mark as SOX relevant for high-value transactions
1157        entry.header.sox_relevant = true;
1158
1159        // Determine required approval levels based on amount
1160        let required_levels = if amount > Decimal::new(100000, 0) {
1161            3 // Executive approval required
1162        } else if amount > Decimal::new(50000, 0) {
1163            2 // Senior management approval
1164        } else {
1165            1 // Manager approval
1166        };
1167
1168        // Create the approval workflow
1169        let mut workflow = ApprovalWorkflow::new(
1170            entry.header.created_by.clone(),
1171            entry.header.user_persona.clone(),
1172            amount,
1173        );
1174        workflow.required_levels = required_levels;
1175
1176        // Simulate submission
1177        let submit_time = entry.header.created_at;
1178        let submit_action = ApprovalAction::new(
1179            entry.header.created_by.clone(),
1180            entry.header.user_persona.clone(),
1181            self.parse_persona(&entry.header.user_persona),
1182            ApprovalActionType::Submit,
1183            0,
1184        )
1185        .with_timestamp(submit_time);
1186
1187        workflow.actions.push(submit_action);
1188        workflow.status = ApprovalStatus::Pending;
1189        workflow.submitted_at = Some(submit_time);
1190
1191        // Simulate approvals with realistic delays
1192        let mut current_time = submit_time;
1193        for level in 1..=required_levels {
1194            // Add delay for approval (1-3 business hours per level)
1195            let delay_hours = self.rng.gen_range(1..4);
1196            current_time += chrono::Duration::hours(delay_hours);
1197
1198            // Skip weekends
1199            while current_time.weekday() == chrono::Weekday::Sat
1200                || current_time.weekday() == chrono::Weekday::Sun
1201            {
1202                current_time += chrono::Duration::days(1);
1203            }
1204
1205            // Generate approver based on level
1206            let (approver_id, approver_role) = self.select_approver(level);
1207
1208            let approve_action = ApprovalAction::new(
1209                approver_id.clone(),
1210                format!("{:?}", approver_role),
1211                approver_role,
1212                ApprovalActionType::Approve,
1213                level,
1214            )
1215            .with_timestamp(current_time);
1216
1217            workflow.actions.push(approve_action);
1218            workflow.current_level = level;
1219        }
1220
1221        // Mark as approved
1222        workflow.status = ApprovalStatus::Approved;
1223        workflow.approved_at = Some(current_time);
1224
1225        entry.header.approval_workflow = Some(workflow);
1226    }
1227
1228    /// Select an approver based on the required level.
1229    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1230        let persona = match level {
1231            1 => UserPersona::Manager,
1232            2 => UserPersona::Controller,
1233            _ => UserPersona::Executive,
1234        };
1235
1236        // Try to get from user pool first
1237        if let Some(ref pool) = self.user_pool {
1238            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1239                return (user.user_id.clone(), persona);
1240            }
1241        }
1242
1243        // Fallback to generated approver
1244        let approver_id = match persona {
1245            UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1246            UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1247            UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1248            _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1249        };
1250
1251        (approver_id, persona)
1252    }
1253
1254    /// Parse user persona from string.
1255    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1256        match persona_str.to_lowercase().as_str() {
1257            s if s.contains("junior") => UserPersona::JuniorAccountant,
1258            s if s.contains("senior") => UserPersona::SeniorAccountant,
1259            s if s.contains("controller") => UserPersona::Controller,
1260            s if s.contains("manager") => UserPersona::Manager,
1261            s if s.contains("executive") => UserPersona::Executive,
1262            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1263            _ => UserPersona::JuniorAccountant, // Default
1264        }
1265    }
1266
1267    /// Enable or disable approval workflow.
1268    pub fn with_approval(mut self, enabled: bool) -> Self {
1269        self.approval_enabled = enabled;
1270        self
1271    }
1272
1273    /// Set the approval threshold amount.
1274    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1275        self.approval_threshold = threshold;
1276        self
1277    }
1278
1279    /// Set the temporal drift controller for simulating distribution changes over time.
1280    ///
1281    /// When drift is enabled, amounts and other distributions will shift based on
1282    /// the period (month) to simulate realistic temporal evolution like inflation
1283    /// or increasing fraud rates.
1284    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1285        self.drift_controller = Some(controller);
1286        self
1287    }
1288
1289    /// Set drift configuration directly.
1290    ///
1291    /// Creates a drift controller from the config. Total periods is calculated
1292    /// from the date range.
1293    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1294        if config.enabled {
1295            let total_periods = self.calculate_total_periods();
1296            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1297        }
1298        self
1299    }
1300
1301    /// Calculate total periods (months) in the date range.
1302    fn calculate_total_periods(&self) -> u32 {
1303        let start_year = self.start_date.year();
1304        let start_month = self.start_date.month();
1305        let end_year = self.end_date.year();
1306        let end_month = self.end_date.month();
1307
1308        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1309    }
1310
1311    /// Calculate the period number (0-indexed) for a given date.
1312    fn date_to_period(&self, date: NaiveDate) -> u32 {
1313        let start_year = self.start_date.year();
1314        let start_month = self.start_date.month() as i32;
1315        let date_year = date.year();
1316        let date_month = date.month() as i32;
1317
1318        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1319    }
1320
1321    /// Get drift adjustments for a given date.
1322    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1323        if let Some(ref controller) = self.drift_controller {
1324            let period = self.date_to_period(date);
1325            controller.compute_adjustments(period)
1326        } else {
1327            DriftAdjustments::none()
1328        }
1329    }
1330
1331    /// Select a user from the pool or generate a generic user ID.
1332    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1333        if let Some(ref pool) = self.user_pool {
1334            let persona = if is_automated {
1335                UserPersona::AutomatedSystem
1336            } else {
1337                // Random distribution among human personas
1338                let roll: f64 = self.rng.gen();
1339                if roll < 0.4 {
1340                    UserPersona::JuniorAccountant
1341                } else if roll < 0.7 {
1342                    UserPersona::SeniorAccountant
1343                } else if roll < 0.85 {
1344                    UserPersona::Controller
1345                } else {
1346                    UserPersona::Manager
1347                }
1348            };
1349
1350            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1351                return (
1352                    user.user_id.clone(),
1353                    format!("{:?}", user.persona).to_lowercase(),
1354                );
1355            }
1356        }
1357
1358        // Fallback to generic format
1359        if is_automated {
1360            (
1361                format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1362                "automated_system".to_string(),
1363            )
1364        } else {
1365            (
1366                format!("USER{:04}", self.rng.gen_range(1..=40)),
1367                "senior_accountant".to_string(),
1368            )
1369        }
1370    }
1371
1372    /// Select transaction source based on configuration weights.
1373    fn select_source(&mut self) -> TransactionSource {
1374        let roll: f64 = self.rng.gen();
1375        let dist = &self.config.source_distribution;
1376
1377        if roll < dist.manual {
1378            TransactionSource::Manual
1379        } else if roll < dist.manual + dist.automated {
1380            TransactionSource::Automated
1381        } else if roll < dist.manual + dist.automated + dist.recurring {
1382            TransactionSource::Recurring
1383        } else {
1384            TransactionSource::Adjustment
1385        }
1386    }
1387
1388    /// Select a business process based on configuration weights.
1389    fn select_business_process(&mut self) -> BusinessProcess {
1390        let roll: f64 = self.rng.gen();
1391
1392        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1393        if roll < 0.35 {
1394            BusinessProcess::O2C
1395        } else if roll < 0.65 {
1396            BusinessProcess::P2P
1397        } else if roll < 0.85 {
1398            BusinessProcess::R2R
1399        } else if roll < 0.95 {
1400            BusinessProcess::H2R
1401        } else {
1402            BusinessProcess::A2R
1403        }
1404    }
1405
1406    fn select_debit_account(&mut self) -> &GLAccount {
1407        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1408        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1409
1410        // 60% asset, 40% expense for debits
1411        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1412            accounts
1413        } else {
1414            expense_accounts
1415        };
1416
1417        all.choose(&mut self.rng)
1418            .copied()
1419            .unwrap_or_else(|| &self.coa.accounts[0])
1420    }
1421
1422    fn select_credit_account(&mut self) -> &GLAccount {
1423        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1424        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1425
1426        // 60% liability, 40% revenue for credits
1427        let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1428            liability_accounts
1429        } else {
1430            revenue_accounts
1431        };
1432
1433        all.choose(&mut self.rng)
1434            .copied()
1435            .unwrap_or_else(|| &self.coa.accounts[0])
1436    }
1437}
1438
1439impl Generator for JournalEntryGenerator {
1440    type Item = JournalEntry;
1441    type Config = (
1442        TransactionConfig,
1443        Arc<ChartOfAccounts>,
1444        Vec<String>,
1445        NaiveDate,
1446        NaiveDate,
1447    );
1448
1449    fn new(config: Self::Config, seed: u64) -> Self {
1450        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1451    }
1452
1453    fn generate_one(&mut self) -> Self::Item {
1454        self.generate()
1455    }
1456
1457    fn reset(&mut self) {
1458        self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1459        self.line_sampler.reset(self.seed + 1);
1460        self.amount_sampler.reset(self.seed + 2);
1461        self.temporal_sampler.reset(self.seed + 3);
1462        self.count = 0;
1463        self.uuid_factory.reset();
1464
1465        // Reset reference generator by recreating it
1466        let mut ref_gen = ReferenceGenerator::new(
1467            self.start_date.year(),
1468            self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1469        );
1470        ref_gen.set_prefix(
1471            ReferenceType::Invoice,
1472            &self.template_config.references.invoice_prefix,
1473        );
1474        ref_gen.set_prefix(
1475            ReferenceType::PurchaseOrder,
1476            &self.template_config.references.po_prefix,
1477        );
1478        ref_gen.set_prefix(
1479            ReferenceType::SalesOrder,
1480            &self.template_config.references.so_prefix,
1481        );
1482        self.reference_generator = ref_gen;
1483    }
1484
1485    fn count(&self) -> u64 {
1486        self.count
1487    }
1488
1489    fn seed(&self) -> u64 {
1490        self.seed
1491    }
1492}
1493
1494#[cfg(test)]
1495mod tests {
1496    use super::*;
1497    use crate::ChartOfAccountsGenerator;
1498
1499    #[test]
1500    fn test_generate_balanced_entries() {
1501        let mut coa_gen =
1502            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1503        let coa = Arc::new(coa_gen.generate());
1504
1505        let mut je_gen = JournalEntryGenerator::new_with_params(
1506            TransactionConfig::default(),
1507            coa,
1508            vec!["1000".to_string()],
1509            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1510            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1511            42,
1512        );
1513
1514        let mut balanced_count = 0;
1515        for _ in 0..100 {
1516            let entry = je_gen.generate();
1517
1518            // Skip entries with human errors as they may be intentionally unbalanced
1519            let has_human_error = entry
1520                .header
1521                .header_text
1522                .as_ref()
1523                .map(|t| t.contains("[HUMAN_ERROR:"))
1524                .unwrap_or(false);
1525
1526            if !has_human_error {
1527                assert!(
1528                    entry.is_balanced(),
1529                    "Entry {:?} is not balanced",
1530                    entry.header.document_id
1531                );
1532                balanced_count += 1;
1533            }
1534            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1535        }
1536
1537        // Ensure most entries are balanced (human errors are rare)
1538        assert!(
1539            balanced_count >= 80,
1540            "Expected at least 80 balanced entries, got {}",
1541            balanced_count
1542        );
1543    }
1544
1545    #[test]
1546    fn test_deterministic_generation() {
1547        let mut coa_gen =
1548            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1549        let coa = Arc::new(coa_gen.generate());
1550
1551        let mut gen1 = JournalEntryGenerator::new_with_params(
1552            TransactionConfig::default(),
1553            Arc::clone(&coa),
1554            vec!["1000".to_string()],
1555            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1556            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1557            42,
1558        );
1559
1560        let mut gen2 = JournalEntryGenerator::new_with_params(
1561            TransactionConfig::default(),
1562            coa,
1563            vec!["1000".to_string()],
1564            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1565            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1566            42,
1567        );
1568
1569        for _ in 0..50 {
1570            let e1 = gen1.generate();
1571            let e2 = gen2.generate();
1572            assert_eq!(e1.header.document_id, e2.header.document_id);
1573            assert_eq!(e1.total_debit(), e2.total_debit());
1574        }
1575    }
1576
1577    #[test]
1578    fn test_templates_generate_descriptions() {
1579        let mut coa_gen =
1580            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1581        let coa = Arc::new(coa_gen.generate());
1582
1583        // Enable all template features
1584        let template_config = TemplateConfig {
1585            names: datasynth_config::schema::NameTemplateConfig {
1586                generate_realistic_names: true,
1587                email_domain: "test.com".to_string(),
1588                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1589            },
1590            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1591                generate_header_text: true,
1592                generate_line_text: true,
1593            },
1594            references: datasynth_config::schema::ReferenceTemplateConfig {
1595                generate_references: true,
1596                invoice_prefix: "TEST-INV".to_string(),
1597                po_prefix: "TEST-PO".to_string(),
1598                so_prefix: "TEST-SO".to_string(),
1599            },
1600        };
1601
1602        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1603            TransactionConfig::default(),
1604            coa,
1605            vec!["1000".to_string()],
1606            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1607            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1608            42,
1609            template_config,
1610            None,
1611        )
1612        .with_persona_errors(false); // Disable for template testing
1613
1614        for _ in 0..10 {
1615            let entry = je_gen.generate();
1616
1617            // Verify header text is populated
1618            assert!(
1619                entry.header.header_text.is_some(),
1620                "Header text should be populated"
1621            );
1622
1623            // Verify reference is populated
1624            assert!(
1625                entry.header.reference.is_some(),
1626                "Reference should be populated"
1627            );
1628
1629            // Verify business process is set
1630            assert!(
1631                entry.header.business_process.is_some(),
1632                "Business process should be set"
1633            );
1634
1635            // Verify line text is populated
1636            for line in &entry.lines {
1637                assert!(line.line_text.is_some(), "Line text should be populated");
1638            }
1639
1640            // Entry should still be balanced
1641            assert!(entry.is_balanced());
1642        }
1643    }
1644
1645    #[test]
1646    fn test_user_pool_integration() {
1647        let mut coa_gen =
1648            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1649        let coa = Arc::new(coa_gen.generate());
1650
1651        let companies = vec!["1000".to_string()];
1652
1653        // Generate user pool
1654        let mut user_gen = crate::UserGenerator::new(42);
1655        let user_pool = user_gen.generate_standard(&companies);
1656
1657        let mut je_gen = JournalEntryGenerator::new_with_full_config(
1658            TransactionConfig::default(),
1659            coa,
1660            companies,
1661            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1662            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1663            42,
1664            TemplateConfig::default(),
1665            Some(user_pool),
1666        );
1667
1668        // Generate entries and verify user IDs are from pool
1669        for _ in 0..20 {
1670            let entry = je_gen.generate();
1671
1672            // User ID should not be generic BATCH/USER format when pool is used
1673            // (though it may still fall back if random selection misses)
1674            assert!(!entry.header.created_by.is_empty());
1675        }
1676    }
1677
1678    #[test]
1679    fn test_master_data_connection() {
1680        let mut coa_gen =
1681            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1682        let coa = Arc::new(coa_gen.generate());
1683
1684        // Create test vendors
1685        let vendors = vec![
1686            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1687            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1688        ];
1689
1690        // Create test customers
1691        let customers = vec![
1692            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1693            Customer::new(
1694                "C-TEST-002",
1695                "Test Customer Two",
1696                CustomerType::SmallBusiness,
1697            ),
1698        ];
1699
1700        // Create test materials
1701        let materials = vec![Material::new(
1702            "MAT-TEST-001",
1703            "Test Material A",
1704            MaterialType::RawMaterial,
1705        )];
1706
1707        // Create generator with master data
1708        let generator = JournalEntryGenerator::new_with_params(
1709            TransactionConfig::default(),
1710            coa,
1711            vec!["1000".to_string()],
1712            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1713            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1714            42,
1715        );
1716
1717        // Without master data
1718        assert!(!generator.is_using_real_master_data());
1719
1720        // Connect master data
1721        let generator_with_data = generator
1722            .with_vendors(&vendors)
1723            .with_customers(&customers)
1724            .with_materials(&materials);
1725
1726        // Should now be using real master data
1727        assert!(generator_with_data.is_using_real_master_data());
1728    }
1729
1730    #[test]
1731    fn test_with_master_data_convenience_method() {
1732        let mut coa_gen =
1733            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1734        let coa = Arc::new(coa_gen.generate());
1735
1736        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1737        let customers = vec![Customer::new(
1738            "C-001",
1739            "Customer One",
1740            CustomerType::Corporate,
1741        )];
1742        let materials = vec![Material::new(
1743            "MAT-001",
1744            "Material One",
1745            MaterialType::RawMaterial,
1746        )];
1747
1748        let generator = JournalEntryGenerator::new_with_params(
1749            TransactionConfig::default(),
1750            coa,
1751            vec!["1000".to_string()],
1752            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1753            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1754            42,
1755        )
1756        .with_master_data(&vendors, &customers, &materials);
1757
1758        assert!(generator.is_using_real_master_data());
1759    }
1760
1761    #[test]
1762    fn test_stress_factors_increase_error_rate() {
1763        let mut coa_gen =
1764            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1765        let coa = Arc::new(coa_gen.generate());
1766
1767        let generator = JournalEntryGenerator::new_with_params(
1768            TransactionConfig::default(),
1769            coa,
1770            vec!["1000".to_string()],
1771            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1772            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1773            42,
1774        );
1775
1776        let base_rate = 0.1;
1777
1778        // Regular day - no stress factors
1779        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
1780        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
1781        assert!(
1782            (regular_rate - base_rate).abs() < 0.01,
1783            "Regular day should have minimal stress factor adjustment"
1784        );
1785
1786        // Month end - 50% more errors
1787        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
1788        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
1789        assert!(
1790            month_end_rate > regular_rate,
1791            "Month end should have higher error rate than regular day"
1792        );
1793
1794        // Year end - double the error rate
1795        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
1796        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
1797        assert!(
1798            year_end_rate > month_end_rate,
1799            "Year end should have highest error rate"
1800        );
1801
1802        // Friday stress
1803        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
1804        let friday_rate = generator.apply_stress_factors(base_rate, friday);
1805        assert!(
1806            friday_rate > regular_rate,
1807            "Friday should have higher error rate than mid-week"
1808        );
1809
1810        // Monday stress
1811        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
1812        let monday_rate = generator.apply_stress_factors(base_rate, monday);
1813        assert!(
1814            monday_rate > regular_rate,
1815            "Monday should have higher error rate than mid-week"
1816        );
1817    }
1818
1819    #[test]
1820    fn test_batching_produces_similar_entries() {
1821        let mut coa_gen =
1822            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1823        let coa = Arc::new(coa_gen.generate());
1824
1825        // Use seed 123 which is more likely to trigger batching
1826        let mut je_gen = JournalEntryGenerator::new_with_params(
1827            TransactionConfig::default(),
1828            coa,
1829            vec!["1000".to_string()],
1830            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1831            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1832            123,
1833        )
1834        .with_persona_errors(false); // Disable to ensure balanced entries
1835
1836        // Generate many entries - at 15% batch rate, should see some batches
1837        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
1838
1839        // Check that all entries are balanced (batched or not)
1840        for entry in &entries {
1841            assert!(
1842                entry.is_balanced(),
1843                "All entries including batched should be balanced"
1844            );
1845        }
1846
1847        // Count entries with same-day posting dates (batch indicator)
1848        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
1849            std::collections::HashMap::new();
1850        for entry in &entries {
1851            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
1852        }
1853
1854        // With batching, some dates should have multiple entries
1855        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
1856        assert!(
1857            dates_with_multiple > 0,
1858            "With batching, should see some dates with multiple entries"
1859        );
1860    }
1861}