Skip to main content

datasynth_generators/
je_generator.rs

1//! Journal Entry generator with statistical distributions.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14    FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17    BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18    EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19    ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23    descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32/// Generator for realistic journal entries.
33pub struct JournalEntryGenerator {
34    rng: ChaCha8Rng,
35    seed: u64,
36    config: TransactionConfig,
37    coa: Arc<ChartOfAccounts>,
38    companies: Vec<String>,
39    company_selector: WeightedCompanySelector,
40    line_sampler: LineItemSampler,
41    amount_sampler: AmountSampler,
42    temporal_sampler: TemporalSampler,
43    start_date: NaiveDate,
44    end_date: NaiveDate,
45    count: u64,
46    uuid_factory: DeterministicUuidFactory,
47    // Enhanced features
48    user_pool: Option<UserPool>,
49    description_generator: DescriptionGenerator,
50    reference_generator: ReferenceGenerator,
51    template_config: TemplateConfig,
52    vendor_pool: VendorPool,
53    customer_pool: CustomerPool,
54    // Material pool for realistic material references
55    material_pool: Option<MaterialPool>,
56    // Flag indicating whether we're using real master data vs defaults
57    using_real_master_data: bool,
58    // Fraud generation
59    fraud_config: FraudConfig,
60    // Persona-based error injection
61    persona_errors_enabled: bool,
62    // Approval threshold enforcement
63    approval_enabled: bool,
64    approval_threshold: rust_decimal::Decimal,
65    // SOD violation rate for approval tracking (0.0 to 1.0)
66    sod_violation_rate: f64,
67    // Batching behavior - humans often process similar items together
68    batch_state: Option<BatchState>,
69    // Temporal drift controller for simulating distribution changes over time
70    drift_controller: Option<DriftController>,
71    // Temporal patterns components
72    business_day_calculator: Option<BusinessDayCalculator>,
73    processing_lag_calculator: Option<ProcessingLagCalculator>,
74    temporal_patterns_config: Option<TemporalPatternsConfig>,
75}
76
77/// State for tracking batch processing behavior.
78///
79/// When humans process transactions, they often batch similar items together
80/// (e.g., processing all invoices from one vendor, entering similar expenses).
81#[derive(Clone)]
82struct BatchState {
83    /// The base entry template to vary
84    base_account_number: String,
85    base_amount: rust_decimal::Decimal,
86    base_business_process: Option<BusinessProcess>,
87    base_posting_date: NaiveDate,
88    /// Remaining entries in this batch
89    remaining: u8,
90}
91
92impl JournalEntryGenerator {
93    /// Create a new journal entry generator.
94    pub fn new_with_params(
95        config: TransactionConfig,
96        coa: Arc<ChartOfAccounts>,
97        companies: Vec<String>,
98        start_date: NaiveDate,
99        end_date: NaiveDate,
100        seed: u64,
101    ) -> Self {
102        Self::new_with_full_config(
103            config,
104            coa,
105            companies,
106            start_date,
107            end_date,
108            seed,
109            TemplateConfig::default(),
110            None,
111        )
112    }
113
114    /// Create a new journal entry generator with full configuration.
115    #[allow(clippy::too_many_arguments)]
116    pub fn new_with_full_config(
117        config: TransactionConfig,
118        coa: Arc<ChartOfAccounts>,
119        companies: Vec<String>,
120        start_date: NaiveDate,
121        end_date: NaiveDate,
122        seed: u64,
123        template_config: TemplateConfig,
124        user_pool: Option<UserPool>,
125    ) -> Self {
126        // Initialize user pool if not provided
127        let user_pool = user_pool.or_else(|| {
128            if template_config.names.generate_realistic_names {
129                let user_gen_config = UserGeneratorConfig {
130                    culture_distribution: vec![
131                        (
132                            datasynth_core::templates::NameCulture::WesternUs,
133                            template_config.names.culture_distribution.western_us,
134                        ),
135                        (
136                            datasynth_core::templates::NameCulture::Hispanic,
137                            template_config.names.culture_distribution.hispanic,
138                        ),
139                        (
140                            datasynth_core::templates::NameCulture::German,
141                            template_config.names.culture_distribution.german,
142                        ),
143                        (
144                            datasynth_core::templates::NameCulture::French,
145                            template_config.names.culture_distribution.french,
146                        ),
147                        (
148                            datasynth_core::templates::NameCulture::Chinese,
149                            template_config.names.culture_distribution.chinese,
150                        ),
151                        (
152                            datasynth_core::templates::NameCulture::Japanese,
153                            template_config.names.culture_distribution.japanese,
154                        ),
155                        (
156                            datasynth_core::templates::NameCulture::Indian,
157                            template_config.names.culture_distribution.indian,
158                        ),
159                    ],
160                    email_domain: template_config.names.email_domain.clone(),
161                    generate_realistic_names: true,
162                };
163                let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
164                Some(user_gen.generate_standard(&companies))
165            } else {
166                None
167            }
168        });
169
170        // Initialize reference generator
171        let mut ref_gen = ReferenceGenerator::new(
172            start_date.year(),
173            companies
174                .first()
175                .map(std::string::String::as_str)
176                .unwrap_or("1000"),
177        );
178        ref_gen.set_prefix(
179            ReferenceType::Invoice,
180            &template_config.references.invoice_prefix,
181        );
182        ref_gen.set_prefix(
183            ReferenceType::PurchaseOrder,
184            &template_config.references.po_prefix,
185        );
186        ref_gen.set_prefix(
187            ReferenceType::SalesOrder,
188            &template_config.references.so_prefix,
189        );
190
191        // Create weighted company selector (uniform weights for this constructor)
192        let company_selector = WeightedCompanySelector::uniform(companies.clone());
193
194        Self {
195            rng: seeded_rng(seed, 0),
196            seed,
197            config: config.clone(),
198            coa,
199            companies,
200            company_selector,
201            line_sampler: LineItemSampler::with_config(
202                seed + 1,
203                config.line_item_distribution.clone(),
204                config.even_odd_distribution.clone(),
205                config.debit_credit_distribution.clone(),
206            ),
207            amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
208            temporal_sampler: TemporalSampler::with_config(
209                seed + 3,
210                config.seasonality.clone(),
211                WorkingHoursConfig::default(),
212                Vec::new(),
213            ),
214            start_date,
215            end_date,
216            count: 0,
217            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
218            user_pool,
219            description_generator: DescriptionGenerator::new(),
220            reference_generator: ref_gen,
221            template_config,
222            vendor_pool: VendorPool::standard(),
223            customer_pool: CustomerPool::standard(),
224            material_pool: None,
225            using_real_master_data: false,
226            fraud_config: FraudConfig::default(),
227            persona_errors_enabled: true, // Enable by default for realism
228            approval_enabled: true,       // Enable by default for realism
229            approval_threshold: rust_decimal::Decimal::new(10000, 0), // $10,000 default threshold
230            sod_violation_rate: 0.10,     // 10% default SOD violation rate
231            batch_state: None,
232            drift_controller: None,
233            // Always provide a basic BusinessDayCalculator so that weekend/holiday
234            // filtering is active even when temporal_patterns is not explicitly enabled.
235            business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
236                Region::US,
237                start_date.year(),
238            ))),
239            processing_lag_calculator: None,
240            temporal_patterns_config: None,
241        }
242    }
243
244    /// Create from a full GeneratorConfig.
245    ///
246    /// This constructor uses the volume_weight from company configs
247    /// for weighted company selection, and fraud config from GeneratorConfig.
248    pub fn from_generator_config(
249        full_config: &GeneratorConfig,
250        coa: Arc<ChartOfAccounts>,
251        start_date: NaiveDate,
252        end_date: NaiveDate,
253        seed: u64,
254    ) -> Self {
255        let companies: Vec<String> = full_config
256            .companies
257            .iter()
258            .map(|c| c.code.clone())
259            .collect();
260
261        // Create weighted selector using volume_weight from company configs
262        let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
263
264        let mut generator = Self::new_with_full_config(
265            full_config.transactions.clone(),
266            coa,
267            companies,
268            start_date,
269            end_date,
270            seed,
271            full_config.templates.clone(),
272            None,
273        );
274
275        // Override the uniform selector with weighted selector
276        generator.company_selector = company_selector;
277
278        // Set fraud config
279        generator.fraud_config = full_config.fraud.clone();
280
281        // Configure temporal patterns if enabled
282        let temporal_config = &full_config.temporal_patterns;
283        if temporal_config.enabled {
284            generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
285        }
286
287        generator
288    }
289
290    /// Configure temporal patterns including business day calculations and processing lags.
291    ///
292    /// This enables realistic temporal behavior including:
293    /// - Business day awareness (no postings on weekends/holidays)
294    /// - Processing lag modeling (event-to-posting delays)
295    /// - Period-end dynamics (volume spikes at month/quarter/year end)
296    pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
297        // Create business day calculator if enabled
298        if config.business_days.enabled {
299            let region = config
300                .calendars
301                .regions
302                .first()
303                .map(|r| Self::parse_region(r))
304                .unwrap_or(Region::US);
305
306            let calendar = HolidayCalendar::new(region, self.start_date.year());
307            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
308        }
309
310        // Create processing lag calculator if enabled
311        if config.processing_lags.enabled {
312            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
313            self.processing_lag_calculator =
314                Some(ProcessingLagCalculator::with_config(seed, lag_config));
315        }
316
317        // Create period-end dynamics if configured
318        let model = config.period_end.model.as_deref().unwrap_or("flat");
319        if model != "flat"
320            || config
321                .period_end
322                .month_end
323                .as_ref()
324                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
325        {
326            let dynamics = Self::convert_period_end_config(&config.period_end);
327            self.temporal_sampler.set_period_end_dynamics(dynamics);
328        }
329
330        self.temporal_patterns_config = Some(config);
331        self
332    }
333
334    /// Configure temporal patterns using a [`CountryPack`] for the holiday calendar.
335    ///
336    /// This is an alternative to [`with_temporal_patterns`] that derives the
337    /// holiday calendar from a country-pack definition rather than the built-in
338    /// region-based calendars.  All other temporal behaviour (business-day
339    /// adjustment, processing lags, period-end dynamics) is configured
340    /// identically.
341    pub fn with_country_pack_temporal(
342        mut self,
343        config: TemporalPatternsConfig,
344        seed: u64,
345        pack: &CountryPack,
346    ) -> Self {
347        // Create business day calculator using the country pack calendar
348        if config.business_days.enabled {
349            let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
350            self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
351        }
352
353        // Create processing lag calculator if enabled
354        if config.processing_lags.enabled {
355            let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
356            self.processing_lag_calculator =
357                Some(ProcessingLagCalculator::with_config(seed, lag_config));
358        }
359
360        // Create period-end dynamics if configured
361        let model = config.period_end.model.as_deref().unwrap_or("flat");
362        if model != "flat"
363            || config
364                .period_end
365                .month_end
366                .as_ref()
367                .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
368        {
369            let dynamics = Self::convert_period_end_config(&config.period_end);
370            self.temporal_sampler.set_period_end_dynamics(dynamics);
371        }
372
373        self.temporal_patterns_config = Some(config);
374        self
375    }
376
377    /// Convert schema processing lag config to core config.
378    fn convert_processing_lag_config(
379        schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
380    ) -> ProcessingLagConfig {
381        let mut config = ProcessingLagConfig {
382            enabled: schema.enabled,
383            ..Default::default()
384        };
385
386        // Helper to convert lag schema to distribution
387        let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
388            let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
389            if let Some(min) = lag.min_hours {
390                dist.min_lag_hours = min;
391            }
392            if let Some(max) = lag.max_hours {
393                dist.max_lag_hours = max;
394            }
395            dist
396        };
397
398        // Apply event-specific lags
399        if let Some(ref lag) = schema.sales_order_lag {
400            config
401                .event_lags
402                .insert(EventType::SalesOrder, convert_lag(lag));
403        }
404        if let Some(ref lag) = schema.purchase_order_lag {
405            config
406                .event_lags
407                .insert(EventType::PurchaseOrder, convert_lag(lag));
408        }
409        if let Some(ref lag) = schema.goods_receipt_lag {
410            config
411                .event_lags
412                .insert(EventType::GoodsReceipt, convert_lag(lag));
413        }
414        if let Some(ref lag) = schema.invoice_receipt_lag {
415            config
416                .event_lags
417                .insert(EventType::InvoiceReceipt, convert_lag(lag));
418        }
419        if let Some(ref lag) = schema.invoice_issue_lag {
420            config
421                .event_lags
422                .insert(EventType::InvoiceIssue, convert_lag(lag));
423        }
424        if let Some(ref lag) = schema.payment_lag {
425            config
426                .event_lags
427                .insert(EventType::Payment, convert_lag(lag));
428        }
429        if let Some(ref lag) = schema.journal_entry_lag {
430            config
431                .event_lags
432                .insert(EventType::JournalEntry, convert_lag(lag));
433        }
434
435        // Apply cross-day posting config
436        if let Some(ref cross_day) = schema.cross_day_posting {
437            config.cross_day = CrossDayConfig {
438                enabled: cross_day.enabled,
439                probability_by_hour: cross_day.probability_by_hour.clone(),
440                ..Default::default()
441            };
442        }
443
444        config
445    }
446
447    /// Convert schema period-end config to core PeriodEndDynamics.
448    fn convert_period_end_config(
449        schema: &datasynth_config::schema::PeriodEndSchemaConfig,
450    ) -> PeriodEndDynamics {
451        let model_type = schema.model.as_deref().unwrap_or("exponential");
452
453        // Helper to convert period config
454        let convert_period =
455            |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
456             default_peak: f64|
457             -> PeriodEndConfig {
458                if let Some(p) = period {
459                    let model = match model_type {
460                        "flat" => PeriodEndModel::FlatMultiplier {
461                            multiplier: p.peak_multiplier.unwrap_or(default_peak),
462                        },
463                        "extended_crunch" => PeriodEndModel::ExtendedCrunch {
464                            start_day: p.start_day.unwrap_or(-10),
465                            sustained_high_days: p.sustained_high_days.unwrap_or(3),
466                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467                            ramp_up_days: 3, // Default ramp-up period
468                        },
469                        _ => PeriodEndModel::ExponentialAcceleration {
470                            start_day: p.start_day.unwrap_or(-10),
471                            base_multiplier: p.base_multiplier.unwrap_or(1.0),
472                            peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
473                            decay_rate: p.decay_rate.unwrap_or(0.3),
474                        },
475                    };
476                    PeriodEndConfig {
477                        enabled: true,
478                        model,
479                        additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
480                    }
481                } else {
482                    PeriodEndConfig {
483                        enabled: true,
484                        model: PeriodEndModel::ExponentialAcceleration {
485                            start_day: -10,
486                            base_multiplier: 1.0,
487                            peak_multiplier: default_peak,
488                            decay_rate: 0.3,
489                        },
490                        additional_multiplier: 1.0,
491                    }
492                }
493            };
494
495        PeriodEndDynamics::new(
496            convert_period(schema.month_end.as_ref(), 2.0),
497            convert_period(schema.quarter_end.as_ref(), 3.5),
498            convert_period(schema.year_end.as_ref(), 5.0),
499        )
500    }
501
502    /// Parse a region string into a Region enum.
503    fn parse_region(region_str: &str) -> Region {
504        match region_str.to_uppercase().as_str() {
505            "US" => Region::US,
506            "DE" => Region::DE,
507            "GB" => Region::GB,
508            "CN" => Region::CN,
509            "JP" => Region::JP,
510            "IN" => Region::IN,
511            "BR" => Region::BR,
512            "MX" => Region::MX,
513            "AU" => Region::AU,
514            "SG" => Region::SG,
515            "KR" => Region::KR,
516            "FR" => Region::FR,
517            "IT" => Region::IT,
518            "ES" => Region::ES,
519            "CA" => Region::CA,
520            _ => Region::US,
521        }
522    }
523
524    /// Set a custom company selector.
525    pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
526        self.company_selector = selector;
527    }
528
529    /// Get the current company selector.
530    pub fn company_selector(&self) -> &WeightedCompanySelector {
531        &self.company_selector
532    }
533
534    /// Set fraud configuration.
535    pub fn set_fraud_config(&mut self, config: FraudConfig) {
536        self.fraud_config = config;
537    }
538
539    /// Set vendors from generated master data.
540    ///
541    /// This replaces the default vendor pool with actual generated vendors,
542    /// ensuring JEs reference real master data entities.
543    pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
544        if !vendors.is_empty() {
545            self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
546            self.using_real_master_data = true;
547        }
548        self
549    }
550
551    /// Set customers from generated master data.
552    ///
553    /// This replaces the default customer pool with actual generated customers,
554    /// ensuring JEs reference real master data entities.
555    pub fn with_customers(mut self, customers: &[Customer]) -> Self {
556        if !customers.is_empty() {
557            self.customer_pool = CustomerPool::from_customers(customers.to_vec());
558            self.using_real_master_data = true;
559        }
560        self
561    }
562
563    /// Set materials from generated master data.
564    ///
565    /// This provides material references for JEs that involve inventory movements.
566    pub fn with_materials(mut self, materials: &[Material]) -> Self {
567        if !materials.is_empty() {
568            self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
569            self.using_real_master_data = true;
570        }
571        self
572    }
573
574    /// Set all master data at once for convenience.
575    ///
576    /// This is the recommended way to configure the JE generator with
577    /// generated master data to ensure data coherence.
578    pub fn with_master_data(
579        self,
580        vendors: &[Vendor],
581        customers: &[Customer],
582        materials: &[Material],
583    ) -> Self {
584        self.with_vendors(vendors)
585            .with_customers(customers)
586            .with_materials(materials)
587    }
588
589    /// Replace the user pool with one generated from a [`CountryPack`].
590    ///
591    /// This is an alternative to the default name-culture distribution that
592    /// derives name pools and weights from the country-pack's `names` section.
593    /// The existing user pool (if any) is discarded and regenerated using
594    /// [`MultiCultureNameGenerator::from_country_pack`].
595    pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
596        let name_gen =
597            datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
598        let config = UserGeneratorConfig {
599            // The culture distribution is embedded in the name generator
600            // itself, so we use an empty list here.
601            culture_distribution: Vec::new(),
602            email_domain: name_gen.email_domain().to_string(),
603            generate_realistic_names: true,
604        };
605        let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
606        self.user_pool = Some(user_gen.generate_standard(&self.companies));
607        self
608    }
609
610    /// Check if the generator is using real master data.
611    pub fn is_using_real_master_data(&self) -> bool {
612        self.using_real_master_data
613    }
614
615    /// Determine if this transaction should be fraudulent.
616    fn determine_fraud(&mut self) -> Option<FraudType> {
617        if !self.fraud_config.enabled {
618            return None;
619        }
620
621        // Roll for fraud based on fraud rate
622        if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
623            return None;
624        }
625
626        // Select fraud type based on distribution
627        Some(self.select_fraud_type())
628    }
629
630    /// Select a fraud type based on the configured distribution.
631    fn select_fraud_type(&mut self) -> FraudType {
632        let dist = &self.fraud_config.fraud_type_distribution;
633        let roll: f64 = self.rng.random();
634
635        let mut cumulative = 0.0;
636
637        cumulative += dist.suspense_account_abuse;
638        if roll < cumulative {
639            return FraudType::SuspenseAccountAbuse;
640        }
641
642        cumulative += dist.fictitious_transaction;
643        if roll < cumulative {
644            return FraudType::FictitiousTransaction;
645        }
646
647        cumulative += dist.revenue_manipulation;
648        if roll < cumulative {
649            return FraudType::RevenueManipulation;
650        }
651
652        cumulative += dist.expense_capitalization;
653        if roll < cumulative {
654            return FraudType::ExpenseCapitalization;
655        }
656
657        cumulative += dist.split_transaction;
658        if roll < cumulative {
659            return FraudType::SplitTransaction;
660        }
661
662        cumulative += dist.timing_anomaly;
663        if roll < cumulative {
664            return FraudType::TimingAnomaly;
665        }
666
667        cumulative += dist.unauthorized_access;
668        if roll < cumulative {
669            return FraudType::UnauthorizedAccess;
670        }
671
672        // Default fallback
673        FraudType::DuplicatePayment
674    }
675
676    /// Map a fraud type to an amount pattern for suspicious amounts.
677    fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
678        match fraud_type {
679            FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
680                FraudAmountPattern::ThresholdAdjacent
681            }
682            FraudType::FictitiousTransaction
683            | FraudType::FictitiousEntry
684            | FraudType::SuspenseAccountAbuse
685            | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
686            FraudType::RevenueManipulation
687            | FraudType::ExpenseCapitalization
688            | FraudType::ImproperCapitalization
689            | FraudType::ReserveManipulation
690            | FraudType::UnauthorizedAccess
691            | FraudType::PrematureRevenue
692            | FraudType::UnderstatedLiabilities
693            | FraudType::OverstatedAssets
694            | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
695            FraudType::DuplicatePayment
696            | FraudType::TimingAnomaly
697            | FraudType::SelfApproval
698            | FraudType::ExceededApprovalLimit
699            | FraudType::SegregationOfDutiesViolation
700            | FraudType::UnauthorizedApproval
701            | FraudType::CollusiveApproval
702            | FraudType::FictitiousVendor
703            | FraudType::ShellCompanyPayment
704            | FraudType::Kickback
705            | FraudType::KickbackScheme
706            | FraudType::InvoiceManipulation
707            | FraudType::AssetMisappropriation
708            | FraudType::InventoryTheft
709            | FraudType::GhostEmployee => FraudAmountPattern::Normal,
710            // Accounting Standards Fraud Types (ASC 606/IFRS 15 - Revenue)
711            FraudType::ImproperRevenueRecognition
712            | FraudType::ImproperPoAllocation
713            | FraudType::VariableConsiderationManipulation
714            | FraudType::ContractModificationMisstatement => {
715                FraudAmountPattern::StatisticallyImprobable
716            }
717            // Accounting Standards Fraud Types (ASC 842/IFRS 16 - Leases)
718            FraudType::LeaseClassificationManipulation
719            | FraudType::OffBalanceSheetLease
720            | FraudType::LeaseLiabilityUnderstatement
721            | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
722            // Accounting Standards Fraud Types (ASC 820/IFRS 13 - Fair Value)
723            FraudType::FairValueHierarchyManipulation
724            | FraudType::Level3InputManipulation
725            | FraudType::ValuationTechniqueManipulation => {
726                FraudAmountPattern::StatisticallyImprobable
727            }
728            // Accounting Standards Fraud Types (ASC 360/IAS 36 - Impairment)
729            FraudType::DelayedImpairment
730            | FraudType::ImpairmentTestAvoidance
731            | FraudType::CashFlowProjectionManipulation
732            | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
733            // Sourcing/Procurement Fraud
734            FraudType::BidRigging
735            | FraudType::PhantomVendorContract
736            | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
737            FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
738            // HR/Payroll Fraud
739            FraudType::GhostEmployeePayroll
740            | FraudType::PayrollInflation
741            | FraudType::DuplicateExpenseReport
742            | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
743            FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
744            // O2C Fraud
745            FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
746            FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
747        }
748    }
749
750    /// Generate a deterministic UUID using the factory.
751    #[inline]
752    fn generate_deterministic_uuid(&self) -> uuid::Uuid {
753        self.uuid_factory.next()
754    }
755
756    /// Cost center pool used for expense account enrichment.
757    const COST_CENTER_POOL: &'static [&'static str] =
758        &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
759
760    /// Enrich journal entry line items with account descriptions, cost centers,
761    /// profit centers, value dates, line text, and assignment fields.
762    ///
763    /// This populates the sparse optional fields that `JournalEntryLine::debit()`
764    /// and `::credit()` leave as `None`.
765    fn enrich_line_items(&self, entry: &mut JournalEntry) {
766        let posting_date = entry.header.posting_date;
767        let company_code = &entry.header.company_code;
768        let header_text = entry.header.header_text.clone();
769        let business_process = entry.header.business_process;
770
771        // Derive a deterministic index from the document_id for cost center selection
772        let doc_id_bytes = entry.header.document_id.as_bytes();
773        let mut cc_seed: usize = 0;
774        for &b in doc_id_bytes {
775            cc_seed = cc_seed.wrapping_add(b as usize);
776        }
777
778        for (i, line) in entry.lines.iter_mut().enumerate() {
779            // 1. account_description: look up from CoA
780            if line.account_description.is_none() {
781                line.account_description = self
782                    .coa
783                    .get_account(&line.gl_account)
784                    .map(|a| a.short_description.clone());
785            }
786
787            // 2. cost_center: assign to expense accounts (5xxx/6xxx)
788            if line.cost_center.is_none() {
789                let first_char = line.gl_account.chars().next().unwrap_or('0');
790                if first_char == '5' || first_char == '6' {
791                    let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
792                    line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
793                }
794            }
795
796            // 3. profit_center: derive from company code + business process
797            if line.profit_center.is_none() {
798                let suffix = match business_process {
799                    Some(BusinessProcess::P2P) => "-P2P",
800                    Some(BusinessProcess::O2C) => "-O2C",
801                    Some(BusinessProcess::R2R) => "-R2R",
802                    Some(BusinessProcess::H2R) => "-H2R",
803                    _ => "",
804                };
805                line.profit_center = Some(format!("PC-{company_code}{suffix}"));
806            }
807
808            // 4. line_text: fall back to header_text if not already set
809            if line.line_text.is_none() {
810                line.line_text = header_text.clone();
811            }
812
813            // 5. value_date: set to posting_date for AR/AP accounts
814            if line.value_date.is_none()
815                && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
816            {
817                line.value_date = Some(posting_date);
818            }
819
820            // 6. assignment: set to vendor/customer reference for AP/AR lines
821            if line.assignment.is_none() {
822                if line.gl_account.starts_with("2000") {
823                    // AP line - use vendor reference from header
824                    if let Some(ref ht) = header_text {
825                        // Try to extract vendor ID from header text patterns like "... - V-001"
826                        if let Some(vendor_part) = ht.rsplit(" - ").next() {
827                            if vendor_part.starts_with("V-")
828                                || vendor_part.starts_with("VENDOR")
829                                || vendor_part.starts_with("Vendor")
830                            {
831                                line.assignment = Some(vendor_part.to_string());
832                            }
833                        }
834                    }
835                } else if line.gl_account.starts_with("1100") {
836                    // AR line - use customer reference from header
837                    if let Some(ref ht) = header_text {
838                        if let Some(customer_part) = ht.rsplit(" - ").next() {
839                            if customer_part.starts_with("C-")
840                                || customer_part.starts_with("CUST")
841                                || customer_part.starts_with("Customer")
842                            {
843                                line.assignment = Some(customer_part.to_string());
844                            }
845                        }
846                    }
847                }
848            }
849        }
850    }
851
852    /// Generate a single journal entry.
853    pub fn generate(&mut self) -> JournalEntry {
854        debug!(
855            count = self.count,
856            companies = self.companies.len(),
857            start_date = %self.start_date,
858            end_date = %self.end_date,
859            "Generating journal entry"
860        );
861
862        // Check if we're in a batch - if so, generate a batched entry
863        if let Some(ref state) = self.batch_state {
864            if state.remaining > 0 {
865                return self.generate_batched_entry();
866            }
867        }
868
869        self.count += 1;
870
871        // Generate deterministic document ID
872        let document_id = self.generate_deterministic_uuid();
873
874        // Sample posting date
875        let mut posting_date = self
876            .temporal_sampler
877            .sample_date(self.start_date, self.end_date);
878
879        // Adjust posting date to be a business day if business day calculator is configured
880        if let Some(ref calc) = self.business_day_calculator {
881            if !calc.is_business_day(posting_date) {
882                // Move to next business day
883                posting_date = calc.next_business_day(posting_date, false);
884                // Ensure we don't exceed end_date
885                if posting_date > self.end_date {
886                    posting_date = calc.prev_business_day(self.end_date, true);
887                }
888            }
889        }
890
891        // Select company using weighted selector
892        let company_code = self.company_selector.select(&mut self.rng).to_string();
893
894        // Sample line item specification
895        let line_spec = self.line_sampler.sample();
896
897        // Determine source type using full 4-way distribution
898        let source = self.select_source();
899        let is_automated = matches!(
900            source,
901            TransactionSource::Automated | TransactionSource::Recurring
902        );
903
904        // Select business process
905        let business_process = self.select_business_process();
906
907        // Determine if this is a fraudulent transaction
908        let fraud_type = self.determine_fraud();
909        let is_fraud = fraud_type.is_some();
910
911        // Sample time based on source
912        let time = self.temporal_sampler.sample_time(!is_automated);
913        let created_at = posting_date.and_time(time).and_utc();
914
915        // Select user from pool or generate generic
916        let (created_by, user_persona) = self.select_user(is_automated);
917
918        // Create header with deterministic UUID
919        let mut header =
920            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
921        header.created_at = created_at;
922        header.source = source;
923        header.created_by = created_by;
924        header.user_persona = user_persona;
925        header.business_process = Some(business_process);
926        header.document_type = Self::document_type_for_process(business_process).to_string();
927        header.is_fraud = is_fraud;
928        header.fraud_type = fraud_type;
929
930        // Generate description context
931        let mut context =
932            DescriptionContext::with_period(posting_date.month(), posting_date.year());
933
934        // Add vendor/customer context based on business process
935        match business_process {
936            BusinessProcess::P2P => {
937                if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
938                    context.vendor_name = Some(vendor.name.clone());
939                }
940            }
941            BusinessProcess::O2C => {
942                if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
943                    context.customer_name = Some(customer.name.clone());
944                }
945            }
946            _ => {}
947        }
948
949        // Generate header text if enabled
950        if self.template_config.descriptions.generate_header_text {
951            header.header_text = Some(self.description_generator.generate_header_text(
952                business_process,
953                &context,
954                &mut self.rng,
955            ));
956        }
957
958        // Generate reference if enabled
959        if self.template_config.references.generate_references {
960            header.reference = Some(
961                self.reference_generator
962                    .generate_for_process_year(business_process, posting_date.year()),
963            );
964        }
965
966        // Derive typed source document from reference prefix
967        header.source_document = header
968            .reference
969            .as_deref()
970            .and_then(DocumentRef::parse)
971            .or_else(|| {
972                if header.source == TransactionSource::Manual {
973                    Some(DocumentRef::Manual)
974                } else {
975                    None
976                }
977            });
978
979        // Generate line items
980        let mut entry = JournalEntry::new(header);
981
982        // Generate amount - use fraud pattern if this is a fraudulent transaction
983        let base_amount = if let Some(ft) = fraud_type {
984            let pattern = self.fraud_type_to_amount_pattern(ft);
985            self.amount_sampler.sample_fraud(pattern)
986        } else {
987            self.amount_sampler.sample()
988        };
989
990        // Apply temporal drift if configured
991        let drift_adjusted_amount = {
992            let drift = self.get_drift_adjustments(posting_date);
993            if drift.amount_mean_multiplier != 1.0 {
994                // Apply drift multiplier (includes seasonal factor if enabled)
995                let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
996                let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
997                Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
998            } else {
999                base_amount
1000            }
1001        };
1002
1003        // Apply human variation to amounts for non-automated transactions
1004        let total_amount = if is_automated {
1005            drift_adjusted_amount // Automated systems use exact amounts
1006        } else {
1007            self.apply_human_variation(drift_adjusted_amount)
1008        };
1009
1010        // Generate debit lines
1011        let debit_amounts = self
1012            .amount_sampler
1013            .sample_summing_to(line_spec.debit_count, total_amount);
1014        for (i, amount) in debit_amounts.into_iter().enumerate() {
1015            let account_number = self.select_debit_account().account_number.clone();
1016            let mut line = JournalEntryLine::debit(
1017                entry.header.document_id,
1018                (i + 1) as u32,
1019                account_number.clone(),
1020                amount,
1021            );
1022
1023            // Generate line text if enabled
1024            if self.template_config.descriptions.generate_line_text {
1025                line.line_text = Some(self.description_generator.generate_line_text(
1026                    &account_number,
1027                    &context,
1028                    &mut self.rng,
1029                ));
1030            }
1031
1032            entry.add_line(line);
1033        }
1034
1035        // Generate credit lines - use the SAME amounts to ensure balance
1036        let credit_amounts = self
1037            .amount_sampler
1038            .sample_summing_to(line_spec.credit_count, total_amount);
1039        for (i, amount) in credit_amounts.into_iter().enumerate() {
1040            let account_number = self.select_credit_account().account_number.clone();
1041            let mut line = JournalEntryLine::credit(
1042                entry.header.document_id,
1043                (line_spec.debit_count + i + 1) as u32,
1044                account_number.clone(),
1045                amount,
1046            );
1047
1048            // Generate line text if enabled
1049            if self.template_config.descriptions.generate_line_text {
1050                line.line_text = Some(self.description_generator.generate_line_text(
1051                    &account_number,
1052                    &context,
1053                    &mut self.rng,
1054                ));
1055            }
1056
1057            entry.add_line(line);
1058        }
1059
1060        // Enrich line items with account descriptions, cost centers, etc.
1061        self.enrich_line_items(&mut entry);
1062
1063        // Apply persona-based errors if enabled and it's a human user
1064        if self.persona_errors_enabled && !is_automated {
1065            self.maybe_inject_persona_error(&mut entry);
1066        }
1067
1068        // Apply approval workflow if enabled and amount exceeds threshold
1069        if self.approval_enabled {
1070            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1071        }
1072
1073        // Populate approved_by / approval_date from the approval workflow
1074        self.populate_approval_fields(&mut entry, posting_date);
1075
1076        // Maybe start a batch of similar entries for realism
1077        self.maybe_start_batch(&entry);
1078
1079        entry
1080    }
1081
1082    /// Enable or disable persona-based error injection.
1083    ///
1084    /// When enabled, entries created by human personas have a chance
1085    /// to contain realistic human errors based on their experience level.
1086    pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1087        self.persona_errors_enabled = enabled;
1088        self
1089    }
1090
1091    /// Set fraud configuration for fraud injection.
1092    ///
1093    /// When fraud is enabled in the config, transactions have a chance
1094    /// to be marked as fraudulent based on the configured fraud rate.
1095    pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1096        self.fraud_config = config;
1097        self
1098    }
1099
1100    /// Check if persona errors are enabled.
1101    pub fn persona_errors_enabled(&self) -> bool {
1102        self.persona_errors_enabled
1103    }
1104
1105    /// Enable or disable batch processing behavior.
1106    ///
1107    /// When enabled (default), the generator will occasionally produce batches
1108    /// of similar entries, simulating how humans batch similar work together.
1109    pub fn with_batching(mut self, enabled: bool) -> Self {
1110        if !enabled {
1111            self.batch_state = None;
1112        }
1113        self
1114    }
1115
1116    /// Check if batch processing is enabled.
1117    pub fn batching_enabled(&self) -> bool {
1118        // Batching is implicitly enabled when not explicitly disabled
1119        true
1120    }
1121
1122    /// Maybe start a batch based on the current entry.
1123    ///
1124    /// Humans often batch similar work: processing invoices from one vendor,
1125    /// entering expense reports for a trip, reconciling similar items.
1126    fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1127        // Only start batch for non-automated, non-fraud entries
1128        if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1129            return;
1130        }
1131
1132        // 15% chance to start a batch (most work is not batched)
1133        if self.rng.random::<f64>() > 0.15 {
1134            return;
1135        }
1136
1137        // Extract key attributes for batching
1138        let base_account = entry
1139            .lines
1140            .first()
1141            .map(|l| l.gl_account.clone())
1142            .unwrap_or_default();
1143
1144        let base_amount = entry.total_debit();
1145
1146        self.batch_state = Some(BatchState {
1147            base_account_number: base_account,
1148            base_amount,
1149            base_business_process: entry.header.business_process,
1150            base_posting_date: entry.header.posting_date,
1151            remaining: self.rng.random_range(2..7), // 2-6 more similar entries
1152        });
1153    }
1154
1155    /// Generate an entry that's part of the current batch.
1156    ///
1157    /// Batched entries have:
1158    /// - Same or very similar business process
1159    /// - Same posting date (batched work done together)
1160    /// - Similar amounts (within ±15%)
1161    /// - Same debit account (processing similar items)
1162    fn generate_batched_entry(&mut self) -> JournalEntry {
1163        use rust_decimal::Decimal;
1164
1165        // Decrement batch counter
1166        if let Some(ref mut state) = self.batch_state {
1167            state.remaining = state.remaining.saturating_sub(1);
1168        }
1169
1170        let Some(batch) = self.batch_state.clone() else {
1171            // This is a programming error - batch_state should be set before calling this method.
1172            // Clear state and fall back to generating a standard entry instead of panicking.
1173            tracing::warn!(
1174                "generate_batched_entry called without batch_state; generating standard entry"
1175            );
1176            self.batch_state = None;
1177            return self.generate();
1178        };
1179
1180        // Use the batch's posting date (work done on same day)
1181        let posting_date = batch.base_posting_date;
1182
1183        self.count += 1;
1184        let document_id = self.generate_deterministic_uuid();
1185
1186        // Select same company (batched work is usually same company)
1187        let company_code = self.company_selector.select(&mut self.rng).to_string();
1188
1189        // Use simplified line spec for batched entries (usually 2-line)
1190        let _line_spec = LineItemSpec {
1191            total_count: 2,
1192            debit_count: 1,
1193            credit_count: 1,
1194            split_type: DebitCreditSplit::Equal,
1195        };
1196
1197        // Batched entries are always manual
1198        let source = TransactionSource::Manual;
1199
1200        // Use the batch's business process
1201        let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1202
1203        // Sample time
1204        let time = self.temporal_sampler.sample_time(true);
1205        let created_at = posting_date.and_time(time).and_utc();
1206
1207        // Same user for batched work
1208        let (created_by, user_persona) = self.select_user(false);
1209
1210        // Create header
1211        let mut header =
1212            JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1213        header.created_at = created_at;
1214        header.source = source;
1215        header.created_by = created_by;
1216        header.user_persona = user_persona;
1217        header.business_process = Some(business_process);
1218        header.document_type = Self::document_type_for_process(business_process).to_string();
1219
1220        // Batched manual entries have Manual source document
1221        header.source_document = Some(DocumentRef::Manual);
1222
1223        // Generate similar amount (within ±15% of base)
1224        let variation = self.rng.random_range(-0.15..0.15);
1225        let varied_amount =
1226            batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1227        let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1228
1229        // Create the entry
1230        let mut entry = JournalEntry::new(header);
1231
1232        // Use same debit account as batch base
1233        let debit_line = JournalEntryLine::debit(
1234            entry.header.document_id,
1235            1,
1236            batch.base_account_number.clone(),
1237            total_amount,
1238        );
1239        entry.add_line(debit_line);
1240
1241        // Select a credit account
1242        let credit_account = self.select_credit_account().account_number.clone();
1243        let credit_line =
1244            JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1245        entry.add_line(credit_line);
1246
1247        // Enrich line items with account descriptions, cost centers, etc.
1248        self.enrich_line_items(&mut entry);
1249
1250        // Apply persona-based errors if enabled
1251        if self.persona_errors_enabled {
1252            self.maybe_inject_persona_error(&mut entry);
1253        }
1254
1255        // Apply approval workflow if enabled
1256        if self.approval_enabled {
1257            self.maybe_apply_approval_workflow(&mut entry, posting_date);
1258        }
1259
1260        // Populate approved_by / approval_date from the approval workflow
1261        self.populate_approval_fields(&mut entry, posting_date);
1262
1263        // Clear batch state if no more entries remaining
1264        if batch.remaining <= 1 {
1265            self.batch_state = None;
1266        }
1267
1268        entry
1269    }
1270
1271    /// Maybe inject a persona-appropriate error based on the persona's error rate.
1272    fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1273        // Parse persona from the entry header
1274        let persona_str = &entry.header.user_persona;
1275        let persona = match persona_str.to_lowercase().as_str() {
1276            s if s.contains("junior") => UserPersona::JuniorAccountant,
1277            s if s.contains("senior") => UserPersona::SeniorAccountant,
1278            s if s.contains("controller") => UserPersona::Controller,
1279            s if s.contains("manager") => UserPersona::Manager,
1280            s if s.contains("executive") => UserPersona::Executive,
1281            _ => return, // Don't inject errors for unknown personas
1282        };
1283
1284        // Get base error rate from persona
1285        let base_error_rate = persona.error_rate();
1286
1287        // Apply stress factors based on posting date
1288        let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1289
1290        // Check if error should occur based on adjusted rate
1291        if self.rng.random::<f64>() >= adjusted_rate {
1292            return; // No error this time
1293        }
1294
1295        // Select and inject persona-appropriate error
1296        self.inject_human_error(entry, persona);
1297    }
1298
1299    /// Apply contextual stress factors to the base error rate.
1300    ///
1301    /// Stress factors increase error likelihood during:
1302    /// - Month-end (day >= 28): 1.5x more errors due to deadline pressure
1303    /// - Quarter-end (Mar, Jun, Sep, Dec): additional 25% boost
1304    /// - Year-end (December 28-31): 2.0x more errors due to audit pressure
1305    /// - Monday morning (catch-up work): 20% more errors
1306    /// - Friday afternoon (rushing to leave): 30% more errors
1307    fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1308        use chrono::Datelike;
1309
1310        let mut rate = base_rate;
1311        let day = posting_date.day();
1312        let month = posting_date.month();
1313
1314        // Year-end stress (December 28-31): double the error rate
1315        if month == 12 && day >= 28 {
1316            rate *= 2.0;
1317            return rate.min(0.5); // Cap at 50% to keep it realistic
1318        }
1319
1320        // Quarter-end stress (last days of Mar, Jun, Sep, Dec)
1321        if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1322            rate *= 1.75; // 75% more errors at quarter end
1323            return rate.min(0.4);
1324        }
1325
1326        // Month-end stress (last 3 days of month)
1327        if day >= 28 {
1328            rate *= 1.5; // 50% more errors at month end
1329        }
1330
1331        // Day-of-week stress effects
1332        let weekday = posting_date.weekday();
1333        match weekday {
1334            chrono::Weekday::Mon => {
1335                // Monday: catching up, often rushed
1336                rate *= 1.2;
1337            }
1338            chrono::Weekday::Fri => {
1339                // Friday: rushing to finish before weekend
1340                rate *= 1.3;
1341            }
1342            _ => {}
1343        }
1344
1345        // Cap at 40% to keep it realistic
1346        rate.min(0.4)
1347    }
1348
1349    /// Apply human-like variation to an amount.
1350    ///
1351    /// Humans don't enter perfectly calculated amounts - they:
1352    /// - Round amounts differently
1353    /// - Estimate instead of calculating exactly
1354    /// - Make small input variations
1355    ///
1356    /// This applies small variations (typically ±2%) to make amounts more realistic.
1357    fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1358        use rust_decimal::Decimal;
1359
1360        // Automated transactions or very small amounts don't get variation
1361        if amount < Decimal::from(10) {
1362            return amount;
1363        }
1364
1365        // 70% chance of human variation being applied
1366        if self.rng.random::<f64>() > 0.70 {
1367            return amount;
1368        }
1369
1370        // Decide which type of human variation to apply
1371        let variation_type: u8 = self.rng.random_range(0..4);
1372
1373        match variation_type {
1374            0 => {
1375                // ±2% variation (common for estimated amounts)
1376                let variation_pct = self.rng.random_range(-0.02..0.02);
1377                let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1378                (amount + variation).round_dp(2)
1379            }
1380            1 => {
1381                // Round to nearest $10
1382                let ten = Decimal::from(10);
1383                (amount / ten).round() * ten
1384            }
1385            2 => {
1386                // Round to nearest $100 (for larger amounts)
1387                if amount >= Decimal::from(500) {
1388                    let hundred = Decimal::from(100);
1389                    (amount / hundred).round() * hundred
1390                } else {
1391                    amount
1392                }
1393            }
1394            3 => {
1395                // Slight under/over payment (±$0.01 to ±$1.00)
1396                let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1397                (amount + cents).max(Decimal::ZERO).round_dp(2)
1398            }
1399            _ => amount,
1400        }
1401    }
1402
1403    /// Rebalance an entry after a one-sided amount modification.
1404    ///
1405    /// When an error modifies one line's amount, this finds a line on the opposite
1406    /// side (credit if modified was debit, or vice versa) and adjusts it by the
1407    /// same impact to maintain balance.
1408    fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1409        // Find a line on the opposite side to adjust
1410        let balancing_idx = entry.lines.iter().position(|l| {
1411            if modified_was_debit {
1412                l.credit_amount > Decimal::ZERO
1413            } else {
1414                l.debit_amount > Decimal::ZERO
1415            }
1416        });
1417
1418        if let Some(idx) = balancing_idx {
1419            if modified_was_debit {
1420                entry.lines[idx].credit_amount += impact;
1421            } else {
1422                entry.lines[idx].debit_amount += impact;
1423            }
1424        }
1425    }
1426
1427    /// Inject a human-like error based on the persona.
1428    ///
1429    /// All error types maintain balance - amount modifications are applied to both sides.
1430    /// Entries are marked with [HUMAN_ERROR:*] tags in header_text for ML detection.
1431    fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1432        use rust_decimal::Decimal;
1433
1434        // Different personas make different types of errors
1435        let error_type: u8 = match persona {
1436            UserPersona::JuniorAccountant => {
1437                // Junior accountants make more varied errors
1438                self.rng.random_range(0..5)
1439            }
1440            UserPersona::SeniorAccountant => {
1441                // Senior accountants mainly make transposition errors
1442                self.rng.random_range(0..3)
1443            }
1444            UserPersona::Controller | UserPersona::Manager => {
1445                // Controllers/managers mainly make rounding or cutoff errors
1446                self.rng.random_range(3..5)
1447            }
1448            _ => return,
1449        };
1450
1451        match error_type {
1452            0 => {
1453                // Transposed digits in an amount
1454                if let Some(line) = entry.lines.get_mut(0) {
1455                    let is_debit = line.debit_amount > Decimal::ZERO;
1456                    let original_amount = if is_debit {
1457                        line.debit_amount
1458                    } else {
1459                        line.credit_amount
1460                    };
1461
1462                    // Simple digit swap in the string representation
1463                    let s = original_amount.to_string();
1464                    if s.len() >= 2 {
1465                        let chars: Vec<char> = s.chars().collect();
1466                        let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1467                        if chars[pos].is_ascii_digit()
1468                            && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1469                        {
1470                            let mut new_chars = chars;
1471                            new_chars.swap(pos, pos + 1);
1472                            if let Ok(new_amount) =
1473                                new_chars.into_iter().collect::<String>().parse::<Decimal>()
1474                            {
1475                                let impact = new_amount - original_amount;
1476
1477                                // Apply to the modified line
1478                                if is_debit {
1479                                    entry.lines[0].debit_amount = new_amount;
1480                                } else {
1481                                    entry.lines[0].credit_amount = new_amount;
1482                                }
1483
1484                                // Rebalance the entry
1485                                Self::rebalance_entry(entry, is_debit, impact);
1486
1487                                entry.header.header_text = Some(
1488                                    entry.header.header_text.clone().unwrap_or_default()
1489                                        + " [HUMAN_ERROR:TRANSPOSITION]",
1490                                );
1491                            }
1492                        }
1493                    }
1494                }
1495            }
1496            1 => {
1497                // Wrong decimal place (off by factor of 10)
1498                if let Some(line) = entry.lines.get_mut(0) {
1499                    let is_debit = line.debit_amount > Decimal::ZERO;
1500                    let original_amount = if is_debit {
1501                        line.debit_amount
1502                    } else {
1503                        line.credit_amount
1504                    };
1505
1506                    let new_amount = original_amount * Decimal::new(10, 0);
1507                    let impact = new_amount - original_amount;
1508
1509                    // Apply to the modified line
1510                    if is_debit {
1511                        entry.lines[0].debit_amount = new_amount;
1512                    } else {
1513                        entry.lines[0].credit_amount = new_amount;
1514                    }
1515
1516                    // Rebalance the entry
1517                    Self::rebalance_entry(entry, is_debit, impact);
1518
1519                    entry.header.header_text = Some(
1520                        entry.header.header_text.clone().unwrap_or_default()
1521                            + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1522                    );
1523                }
1524            }
1525            2 => {
1526                // Typo in description (doesn't affect balance)
1527                if let Some(ref mut text) = entry.header.header_text {
1528                    let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1529                    let correct = ["the", "and", "with", "that", "receive"];
1530                    let idx = self.rng.random_range(0..typos.len());
1531                    if text.to_lowercase().contains(correct[idx]) {
1532                        *text = text.replace(correct[idx], typos[idx]);
1533                        *text = format!("{text} [HUMAN_ERROR:TYPO]");
1534                    }
1535                }
1536            }
1537            3 => {
1538                // Rounding to round number
1539                if let Some(line) = entry.lines.get_mut(0) {
1540                    let is_debit = line.debit_amount > Decimal::ZERO;
1541                    let original_amount = if is_debit {
1542                        line.debit_amount
1543                    } else {
1544                        line.credit_amount
1545                    };
1546
1547                    let new_amount =
1548                        (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1549                    let impact = new_amount - original_amount;
1550
1551                    // Apply to the modified line
1552                    if is_debit {
1553                        entry.lines[0].debit_amount = new_amount;
1554                    } else {
1555                        entry.lines[0].credit_amount = new_amount;
1556                    }
1557
1558                    // Rebalance the entry
1559                    Self::rebalance_entry(entry, is_debit, impact);
1560
1561                    entry.header.header_text = Some(
1562                        entry.header.header_text.clone().unwrap_or_default()
1563                            + " [HUMAN_ERROR:ROUNDED]",
1564                    );
1565                }
1566            }
1567            4 => {
1568                // Late posting marker (document date much earlier than posting date)
1569                // This doesn't create an imbalance
1570                if entry.header.document_date == entry.header.posting_date {
1571                    let days_late = self.rng.random_range(5..15);
1572                    entry.header.document_date =
1573                        entry.header.posting_date - chrono::Duration::days(days_late);
1574                    entry.header.header_text = Some(
1575                        entry.header.header_text.clone().unwrap_or_default()
1576                            + " [HUMAN_ERROR:LATE_POSTING]",
1577                    );
1578                }
1579            }
1580            _ => {}
1581        }
1582    }
1583
1584    /// Apply approval workflow for high-value transactions.
1585    ///
1586    /// If the entry amount exceeds the approval threshold, simulate an
1587    /// approval workflow with appropriate approvers based on amount.
1588    fn maybe_apply_approval_workflow(
1589        &mut self,
1590        entry: &mut JournalEntry,
1591        _posting_date: NaiveDate,
1592    ) {
1593        use rust_decimal::Decimal;
1594
1595        let amount = entry.total_debit();
1596
1597        // Skip if amount is below threshold
1598        if amount <= self.approval_threshold {
1599            // Auto-approved below threshold
1600            let workflow = ApprovalWorkflow::auto_approved(
1601                entry.header.created_by.clone(),
1602                entry.header.user_persona.clone(),
1603                amount,
1604                entry.header.created_at,
1605            );
1606            entry.header.approval_workflow = Some(workflow);
1607            return;
1608        }
1609
1610        // Mark as SOX relevant for high-value transactions
1611        entry.header.sox_relevant = true;
1612
1613        // Determine required approval levels based on amount
1614        let required_levels = if amount > Decimal::new(100000, 0) {
1615            3 // Executive approval required
1616        } else if amount > Decimal::new(50000, 0) {
1617            2 // Senior management approval
1618        } else {
1619            1 // Manager approval
1620        };
1621
1622        // Create the approval workflow
1623        let mut workflow = ApprovalWorkflow::new(
1624            entry.header.created_by.clone(),
1625            entry.header.user_persona.clone(),
1626            amount,
1627        );
1628        workflow.required_levels = required_levels;
1629
1630        // Simulate submission
1631        let submit_time = entry.header.created_at;
1632        let submit_action = ApprovalAction::new(
1633            entry.header.created_by.clone(),
1634            entry.header.user_persona.clone(),
1635            self.parse_persona(&entry.header.user_persona),
1636            ApprovalActionType::Submit,
1637            0,
1638        )
1639        .with_timestamp(submit_time);
1640
1641        workflow.actions.push(submit_action);
1642        workflow.status = ApprovalStatus::Pending;
1643        workflow.submitted_at = Some(submit_time);
1644
1645        // Simulate approvals with realistic delays
1646        let mut current_time = submit_time;
1647        for level in 1..=required_levels {
1648            // Add delay for approval (1-3 business hours per level)
1649            let delay_hours = self.rng.random_range(1..4);
1650            current_time += chrono::Duration::hours(delay_hours);
1651
1652            // Skip weekends
1653            while current_time.weekday() == chrono::Weekday::Sat
1654                || current_time.weekday() == chrono::Weekday::Sun
1655            {
1656                current_time += chrono::Duration::days(1);
1657            }
1658
1659            // Generate approver based on level
1660            let (approver_id, approver_role) = self.select_approver(level);
1661
1662            let approve_action = ApprovalAction::new(
1663                approver_id.clone(),
1664                approver_role.to_string(),
1665                approver_role,
1666                ApprovalActionType::Approve,
1667                level,
1668            )
1669            .with_timestamp(current_time);
1670
1671            workflow.actions.push(approve_action);
1672            workflow.current_level = level;
1673        }
1674
1675        // Mark as approved
1676        workflow.status = ApprovalStatus::Approved;
1677        workflow.approved_at = Some(current_time);
1678
1679        entry.header.approval_workflow = Some(workflow);
1680    }
1681
1682    /// Select an approver based on the required level.
1683    fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1684        let persona = match level {
1685            1 => UserPersona::Manager,
1686            2 => UserPersona::Controller,
1687            _ => UserPersona::Executive,
1688        };
1689
1690        // Try to get from user pool first
1691        if let Some(ref pool) = self.user_pool {
1692            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1693                return (user.user_id.clone(), persona);
1694            }
1695        }
1696
1697        // Fallback to generated approver
1698        let approver_id = match persona {
1699            UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1700            UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1701            UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1702            _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1703        };
1704
1705        (approver_id, persona)
1706    }
1707
1708    /// Parse user persona from string.
1709    fn parse_persona(&self, persona_str: &str) -> UserPersona {
1710        match persona_str.to_lowercase().as_str() {
1711            s if s.contains("junior") => UserPersona::JuniorAccountant,
1712            s if s.contains("senior") => UserPersona::SeniorAccountant,
1713            s if s.contains("controller") => UserPersona::Controller,
1714            s if s.contains("manager") => UserPersona::Manager,
1715            s if s.contains("executive") => UserPersona::Executive,
1716            s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1717            _ => UserPersona::JuniorAccountant, // Default
1718        }
1719    }
1720
1721    /// Enable or disable approval workflow.
1722    pub fn with_approval(mut self, enabled: bool) -> Self {
1723        self.approval_enabled = enabled;
1724        self
1725    }
1726
1727    /// Set the approval threshold amount.
1728    pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1729        self.approval_threshold = threshold;
1730        self
1731    }
1732
1733    /// Set the SOD violation rate for approval tracking.
1734    ///
1735    /// When a transaction is approved, there is a `rate` probability (0.0 to 1.0)
1736    /// that the approver is the same as the creator, which constitutes a SOD violation.
1737    /// Default is 0.10 (10%).
1738    pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1739        self.sod_violation_rate = rate;
1740        self
1741    }
1742
1743    /// Populate `approved_by` and `approval_date` from the approval workflow,
1744    /// and flag SOD violations when the approver matches the creator.
1745    fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1746        if let Some(ref workflow) = entry.header.approval_workflow {
1747            // Extract the last approver from the workflow actions
1748            let last_approver = workflow
1749                .actions
1750                .iter()
1751                .rev()
1752                .find(|a| matches!(a.action, ApprovalActionType::Approve));
1753
1754            if let Some(approver_action) = last_approver {
1755                entry.header.approved_by = Some(approver_action.actor_id.clone());
1756                entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1757            } else {
1758                // No explicit approver (auto-approved); use the preparer
1759                entry.header.approved_by = Some(workflow.preparer_id.clone());
1760                entry.header.approval_date = Some(posting_date);
1761            }
1762
1763            // Inject SOD violation: with configured probability, set approver = creator
1764            if self.rng.random::<f64>() < self.sod_violation_rate {
1765                let creator = entry.header.created_by.clone();
1766                entry.header.approved_by = Some(creator);
1767                entry.header.sod_violation = true;
1768                entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1769            }
1770        }
1771    }
1772
1773    /// Set the temporal drift controller for simulating distribution changes over time.
1774    ///
1775    /// When drift is enabled, amounts and other distributions will shift based on
1776    /// the period (month) to simulate realistic temporal evolution like inflation
1777    /// or increasing fraud rates.
1778    pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1779        self.drift_controller = Some(controller);
1780        self
1781    }
1782
1783    /// Set drift configuration directly.
1784    ///
1785    /// Creates a drift controller from the config. Total periods is calculated
1786    /// from the date range.
1787    pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1788        if config.enabled {
1789            let total_periods = self.calculate_total_periods();
1790            self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1791        }
1792        self
1793    }
1794
1795    /// Calculate total periods (months) in the date range.
1796    fn calculate_total_periods(&self) -> u32 {
1797        let start_year = self.start_date.year();
1798        let start_month = self.start_date.month();
1799        let end_year = self.end_date.year();
1800        let end_month = self.end_date.month();
1801
1802        ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1803    }
1804
1805    /// Calculate the period number (0-indexed) for a given date.
1806    fn date_to_period(&self, date: NaiveDate) -> u32 {
1807        let start_year = self.start_date.year();
1808        let start_month = self.start_date.month() as i32;
1809        let date_year = date.year();
1810        let date_month = date.month() as i32;
1811
1812        ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1813    }
1814
1815    /// Get drift adjustments for a given date.
1816    fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1817        if let Some(ref controller) = self.drift_controller {
1818            let period = self.date_to_period(date);
1819            controller.compute_adjustments(period)
1820        } else {
1821            DriftAdjustments::none()
1822        }
1823    }
1824
1825    /// Select a user from the pool or generate a generic user ID.
1826    #[inline]
1827    fn select_user(&mut self, is_automated: bool) -> (String, String) {
1828        if let Some(ref pool) = self.user_pool {
1829            let persona = if is_automated {
1830                UserPersona::AutomatedSystem
1831            } else {
1832                // Random distribution among human personas
1833                let roll: f64 = self.rng.random();
1834                if roll < 0.4 {
1835                    UserPersona::JuniorAccountant
1836                } else if roll < 0.7 {
1837                    UserPersona::SeniorAccountant
1838                } else if roll < 0.85 {
1839                    UserPersona::Controller
1840                } else {
1841                    UserPersona::Manager
1842                }
1843            };
1844
1845            if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1846                return (user.user_id.clone(), user.persona.to_string());
1847            }
1848        }
1849
1850        // Fallback to generic format
1851        if is_automated {
1852            (
1853                format!("BATCH{:04}", self.rng.random_range(1..=20)),
1854                "automated_system".to_string(),
1855            )
1856        } else {
1857            (
1858                format!("USER{:04}", self.rng.random_range(1..=40)),
1859                "senior_accountant".to_string(),
1860            )
1861        }
1862    }
1863
1864    /// Select transaction source based on configuration weights.
1865    #[inline]
1866    fn select_source(&mut self) -> TransactionSource {
1867        let roll: f64 = self.rng.random();
1868        let dist = &self.config.source_distribution;
1869
1870        if roll < dist.manual {
1871            TransactionSource::Manual
1872        } else if roll < dist.manual + dist.automated {
1873            TransactionSource::Automated
1874        } else if roll < dist.manual + dist.automated + dist.recurring {
1875            TransactionSource::Recurring
1876        } else {
1877            TransactionSource::Adjustment
1878        }
1879    }
1880
1881    /// Select a business process based on configuration weights.
1882    #[inline]
1883    /// Map a business process to a SAP-style document type code.
1884    ///
1885    /// - P2P → "KR" (vendor invoice)
1886    /// - O2C → "DR" (customer invoice)
1887    /// - R2R → "SA" (general journal)
1888    /// - H2R → "HR" (HR posting)
1889    /// - A2R → "AA" (asset posting)
1890    /// - others → "SA"
1891    fn document_type_for_process(process: BusinessProcess) -> &'static str {
1892        match process {
1893            BusinessProcess::P2P => "KR",
1894            BusinessProcess::O2C => "DR",
1895            BusinessProcess::R2R => "SA",
1896            BusinessProcess::H2R => "HR",
1897            BusinessProcess::A2R => "AA",
1898            _ => "SA",
1899        }
1900    }
1901
1902    fn select_business_process(&mut self) -> BusinessProcess {
1903        let roll: f64 = self.rng.random();
1904
1905        // Default weights: O2C=35%, P2P=30%, R2R=20%, H2R=10%, A2R=5%
1906        if roll < 0.35 {
1907            BusinessProcess::O2C
1908        } else if roll < 0.65 {
1909            BusinessProcess::P2P
1910        } else if roll < 0.85 {
1911            BusinessProcess::R2R
1912        } else if roll < 0.95 {
1913            BusinessProcess::H2R
1914        } else {
1915            BusinessProcess::A2R
1916        }
1917    }
1918
1919    #[inline]
1920    fn select_debit_account(&mut self) -> &GLAccount {
1921        let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1922        let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1923
1924        // 60% asset, 40% expense for debits
1925        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1926            accounts
1927        } else {
1928            expense_accounts
1929        };
1930
1931        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1932            tracing::warn!(
1933                "Account selection returned empty list, falling back to first COA account"
1934            );
1935            &self.coa.accounts[0]
1936        })
1937    }
1938
1939    #[inline]
1940    fn select_credit_account(&mut self) -> &GLAccount {
1941        let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1942        let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1943
1944        // 60% liability, 40% revenue for credits
1945        let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1946            liability_accounts
1947        } else {
1948            revenue_accounts
1949        };
1950
1951        all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1952            tracing::warn!(
1953                "Account selection returned empty list, falling back to first COA account"
1954            );
1955            &self.coa.accounts[0]
1956        })
1957    }
1958}
1959
1960impl Generator for JournalEntryGenerator {
1961    type Item = JournalEntry;
1962    type Config = (
1963        TransactionConfig,
1964        Arc<ChartOfAccounts>,
1965        Vec<String>,
1966        NaiveDate,
1967        NaiveDate,
1968    );
1969
1970    fn new(config: Self::Config, seed: u64) -> Self {
1971        Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1972    }
1973
1974    fn generate_one(&mut self) -> Self::Item {
1975        self.generate()
1976    }
1977
1978    fn reset(&mut self) {
1979        self.rng = seeded_rng(self.seed, 0);
1980        self.line_sampler.reset(self.seed + 1);
1981        self.amount_sampler.reset(self.seed + 2);
1982        self.temporal_sampler.reset(self.seed + 3);
1983        self.count = 0;
1984        self.uuid_factory.reset();
1985
1986        // Reset reference generator by recreating it
1987        let mut ref_gen = ReferenceGenerator::new(
1988            self.start_date.year(),
1989            self.companies
1990                .first()
1991                .map(std::string::String::as_str)
1992                .unwrap_or("1000"),
1993        );
1994        ref_gen.set_prefix(
1995            ReferenceType::Invoice,
1996            &self.template_config.references.invoice_prefix,
1997        );
1998        ref_gen.set_prefix(
1999            ReferenceType::PurchaseOrder,
2000            &self.template_config.references.po_prefix,
2001        );
2002        ref_gen.set_prefix(
2003            ReferenceType::SalesOrder,
2004            &self.template_config.references.so_prefix,
2005        );
2006        self.reference_generator = ref_gen;
2007    }
2008
2009    fn count(&self) -> u64 {
2010        self.count
2011    }
2012
2013    fn seed(&self) -> u64 {
2014        self.seed
2015    }
2016}
2017
2018use datasynth_core::traits::ParallelGenerator;
2019
2020impl ParallelGenerator for JournalEntryGenerator {
2021    /// Split this generator into `parts` independent sub-generators.
2022    ///
2023    /// Each sub-generator gets a deterministic seed derived from the parent seed
2024    /// and its partition index, plus a partitioned UUID factory to avoid contention.
2025    /// The results are deterministic for a given partition count.
2026    fn split(self, parts: usize) -> Vec<Self> {
2027        let parts = parts.max(1);
2028        (0..parts)
2029            .map(|i| {
2030                // Derive a unique seed per partition using a golden-ratio constant
2031                let sub_seed = self
2032                    .seed
2033                    .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2034
2035                let mut gen = JournalEntryGenerator::new_with_full_config(
2036                    self.config.clone(),
2037                    Arc::clone(&self.coa),
2038                    self.companies.clone(),
2039                    self.start_date,
2040                    self.end_date,
2041                    sub_seed,
2042                    self.template_config.clone(),
2043                    self.user_pool.clone(),
2044                );
2045
2046                // Copy over configuration state
2047                gen.company_selector = self.company_selector.clone();
2048                gen.vendor_pool = self.vendor_pool.clone();
2049                gen.customer_pool = self.customer_pool.clone();
2050                gen.material_pool = self.material_pool.clone();
2051                gen.using_real_master_data = self.using_real_master_data;
2052                gen.fraud_config = self.fraud_config.clone();
2053                gen.persona_errors_enabled = self.persona_errors_enabled;
2054                gen.approval_enabled = self.approval_enabled;
2055                gen.approval_threshold = self.approval_threshold;
2056                gen.sod_violation_rate = self.sod_violation_rate;
2057
2058                // Use partitioned UUID factory to eliminate atomic contention
2059                gen.uuid_factory = DeterministicUuidFactory::for_partition(
2060                    sub_seed,
2061                    GeneratorType::JournalEntry,
2062                    i as u8,
2063                );
2064
2065                // Copy temporal patterns if configured
2066                if let Some(ref config) = self.temporal_patterns_config {
2067                    gen.temporal_patterns_config = Some(config.clone());
2068                    // Rebuild business day calculator from the stored config
2069                    if config.business_days.enabled {
2070                        if let Some(ref bdc) = self.business_day_calculator {
2071                            gen.business_day_calculator = Some(bdc.clone());
2072                        }
2073                    }
2074                    // Rebuild processing lag calculator with partition seed
2075                    if config.processing_lags.enabled {
2076                        let lag_config =
2077                            Self::convert_processing_lag_config(&config.processing_lags);
2078                        gen.processing_lag_calculator =
2079                            Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2080                    }
2081                }
2082
2083                // Copy drift controller if present
2084                if let Some(ref dc) = self.drift_controller {
2085                    gen.drift_controller = Some(dc.clone());
2086                }
2087
2088                gen
2089            })
2090            .collect()
2091    }
2092}
2093
2094#[cfg(test)]
2095#[allow(clippy::unwrap_used)]
2096mod tests {
2097    use super::*;
2098    use crate::ChartOfAccountsGenerator;
2099
2100    #[test]
2101    fn test_generate_balanced_entries() {
2102        let mut coa_gen =
2103            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2104        let coa = Arc::new(coa_gen.generate());
2105
2106        let mut je_gen = JournalEntryGenerator::new_with_params(
2107            TransactionConfig::default(),
2108            coa,
2109            vec!["1000".to_string()],
2110            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2111            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2112            42,
2113        );
2114
2115        let mut balanced_count = 0;
2116        for _ in 0..100 {
2117            let entry = je_gen.generate();
2118
2119            // Skip entries with human errors as they may be intentionally unbalanced
2120            let has_human_error = entry
2121                .header
2122                .header_text
2123                .as_ref()
2124                .map(|t| t.contains("[HUMAN_ERROR:"))
2125                .unwrap_or(false);
2126
2127            if !has_human_error {
2128                assert!(
2129                    entry.is_balanced(),
2130                    "Entry {:?} is not balanced",
2131                    entry.header.document_id
2132                );
2133                balanced_count += 1;
2134            }
2135            assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2136        }
2137
2138        // Ensure most entries are balanced (human errors are rare)
2139        assert!(
2140            balanced_count >= 80,
2141            "Expected at least 80 balanced entries, got {}",
2142            balanced_count
2143        );
2144    }
2145
2146    #[test]
2147    fn test_deterministic_generation() {
2148        let mut coa_gen =
2149            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2150        let coa = Arc::new(coa_gen.generate());
2151
2152        let mut gen1 = JournalEntryGenerator::new_with_params(
2153            TransactionConfig::default(),
2154            Arc::clone(&coa),
2155            vec!["1000".to_string()],
2156            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2157            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2158            42,
2159        );
2160
2161        let mut gen2 = JournalEntryGenerator::new_with_params(
2162            TransactionConfig::default(),
2163            coa,
2164            vec!["1000".to_string()],
2165            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2166            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2167            42,
2168        );
2169
2170        for _ in 0..50 {
2171            let e1 = gen1.generate();
2172            let e2 = gen2.generate();
2173            assert_eq!(e1.header.document_id, e2.header.document_id);
2174            assert_eq!(e1.total_debit(), e2.total_debit());
2175        }
2176    }
2177
2178    #[test]
2179    fn test_templates_generate_descriptions() {
2180        let mut coa_gen =
2181            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2182        let coa = Arc::new(coa_gen.generate());
2183
2184        // Enable all template features
2185        let template_config = TemplateConfig {
2186            names: datasynth_config::schema::NameTemplateConfig {
2187                generate_realistic_names: true,
2188                email_domain: "test.com".to_string(),
2189                culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2190            },
2191            descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2192                generate_header_text: true,
2193                generate_line_text: true,
2194            },
2195            references: datasynth_config::schema::ReferenceTemplateConfig {
2196                generate_references: true,
2197                invoice_prefix: "TEST-INV".to_string(),
2198                po_prefix: "TEST-PO".to_string(),
2199                so_prefix: "TEST-SO".to_string(),
2200            },
2201        };
2202
2203        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2204            TransactionConfig::default(),
2205            coa,
2206            vec!["1000".to_string()],
2207            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2208            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2209            42,
2210            template_config,
2211            None,
2212        )
2213        .with_persona_errors(false); // Disable for template testing
2214
2215        for _ in 0..10 {
2216            let entry = je_gen.generate();
2217
2218            // Verify header text is populated
2219            assert!(
2220                entry.header.header_text.is_some(),
2221                "Header text should be populated"
2222            );
2223
2224            // Verify reference is populated
2225            assert!(
2226                entry.header.reference.is_some(),
2227                "Reference should be populated"
2228            );
2229
2230            // Verify business process is set
2231            assert!(
2232                entry.header.business_process.is_some(),
2233                "Business process should be set"
2234            );
2235
2236            // Verify line text is populated
2237            for line in &entry.lines {
2238                assert!(line.line_text.is_some(), "Line text should be populated");
2239            }
2240
2241            // Entry should still be balanced
2242            assert!(entry.is_balanced());
2243        }
2244    }
2245
2246    #[test]
2247    fn test_user_pool_integration() {
2248        let mut coa_gen =
2249            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2250        let coa = Arc::new(coa_gen.generate());
2251
2252        let companies = vec!["1000".to_string()];
2253
2254        // Generate user pool
2255        let mut user_gen = crate::UserGenerator::new(42);
2256        let user_pool = user_gen.generate_standard(&companies);
2257
2258        let mut je_gen = JournalEntryGenerator::new_with_full_config(
2259            TransactionConfig::default(),
2260            coa,
2261            companies,
2262            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2263            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2264            42,
2265            TemplateConfig::default(),
2266            Some(user_pool),
2267        );
2268
2269        // Generate entries and verify user IDs are from pool
2270        for _ in 0..20 {
2271            let entry = je_gen.generate();
2272
2273            // User ID should not be generic BATCH/USER format when pool is used
2274            // (though it may still fall back if random selection misses)
2275            assert!(!entry.header.created_by.is_empty());
2276        }
2277    }
2278
2279    #[test]
2280    fn test_master_data_connection() {
2281        let mut coa_gen =
2282            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2283        let coa = Arc::new(coa_gen.generate());
2284
2285        // Create test vendors
2286        let vendors = vec![
2287            Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2288            Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2289        ];
2290
2291        // Create test customers
2292        let customers = vec![
2293            Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2294            Customer::new(
2295                "C-TEST-002",
2296                "Test Customer Two",
2297                CustomerType::SmallBusiness,
2298            ),
2299        ];
2300
2301        // Create test materials
2302        let materials = vec![Material::new(
2303            "MAT-TEST-001",
2304            "Test Material A",
2305            MaterialType::RawMaterial,
2306        )];
2307
2308        // Create generator with master data
2309        let generator = JournalEntryGenerator::new_with_params(
2310            TransactionConfig::default(),
2311            coa,
2312            vec!["1000".to_string()],
2313            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2314            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2315            42,
2316        );
2317
2318        // Without master data
2319        assert!(!generator.is_using_real_master_data());
2320
2321        // Connect master data
2322        let generator_with_data = generator
2323            .with_vendors(&vendors)
2324            .with_customers(&customers)
2325            .with_materials(&materials);
2326
2327        // Should now be using real master data
2328        assert!(generator_with_data.is_using_real_master_data());
2329    }
2330
2331    #[test]
2332    fn test_with_master_data_convenience_method() {
2333        let mut coa_gen =
2334            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2335        let coa = Arc::new(coa_gen.generate());
2336
2337        let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2338        let customers = vec![Customer::new(
2339            "C-001",
2340            "Customer One",
2341            CustomerType::Corporate,
2342        )];
2343        let materials = vec![Material::new(
2344            "MAT-001",
2345            "Material One",
2346            MaterialType::RawMaterial,
2347        )];
2348
2349        let generator = JournalEntryGenerator::new_with_params(
2350            TransactionConfig::default(),
2351            coa,
2352            vec!["1000".to_string()],
2353            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2354            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2355            42,
2356        )
2357        .with_master_data(&vendors, &customers, &materials);
2358
2359        assert!(generator.is_using_real_master_data());
2360    }
2361
2362    #[test]
2363    fn test_stress_factors_increase_error_rate() {
2364        let mut coa_gen =
2365            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2366        let coa = Arc::new(coa_gen.generate());
2367
2368        let generator = JournalEntryGenerator::new_with_params(
2369            TransactionConfig::default(),
2370            coa,
2371            vec!["1000".to_string()],
2372            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2373            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2374            42,
2375        );
2376
2377        let base_rate = 0.1;
2378
2379        // Regular day - no stress factors
2380        let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); // Mid-June Wednesday
2381        let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2382        assert!(
2383            (regular_rate - base_rate).abs() < 0.01,
2384            "Regular day should have minimal stress factor adjustment"
2385        );
2386
2387        // Month end - 50% more errors
2388        let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); // June 29 (Saturday)
2389        let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2390        assert!(
2391            month_end_rate > regular_rate,
2392            "Month end should have higher error rate than regular day"
2393        );
2394
2395        // Year end - double the error rate
2396        let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); // December 30
2397        let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2398        assert!(
2399            year_end_rate > month_end_rate,
2400            "Year end should have highest error rate"
2401        );
2402
2403        // Friday stress
2404        let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); // Friday
2405        let friday_rate = generator.apply_stress_factors(base_rate, friday);
2406        assert!(
2407            friday_rate > regular_rate,
2408            "Friday should have higher error rate than mid-week"
2409        );
2410
2411        // Monday stress
2412        let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); // Monday
2413        let monday_rate = generator.apply_stress_factors(base_rate, monday);
2414        assert!(
2415            monday_rate > regular_rate,
2416            "Monday should have higher error rate than mid-week"
2417        );
2418    }
2419
2420    #[test]
2421    fn test_batching_produces_similar_entries() {
2422        let mut coa_gen =
2423            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2424        let coa = Arc::new(coa_gen.generate());
2425
2426        // Use seed 123 which is more likely to trigger batching
2427        let mut je_gen = JournalEntryGenerator::new_with_params(
2428            TransactionConfig::default(),
2429            coa,
2430            vec!["1000".to_string()],
2431            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2432            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2433            123,
2434        )
2435        .with_persona_errors(false); // Disable to ensure balanced entries
2436
2437        // Generate many entries - at 15% batch rate, should see some batches
2438        let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2439
2440        // Check that all entries are balanced (batched or not)
2441        for entry in &entries {
2442            assert!(
2443                entry.is_balanced(),
2444                "All entries including batched should be balanced"
2445            );
2446        }
2447
2448        // Count entries with same-day posting dates (batch indicator)
2449        let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2450            std::collections::HashMap::new();
2451        for entry in &entries {
2452            *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2453        }
2454
2455        // With batching, some dates should have multiple entries
2456        let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2457        assert!(
2458            dates_with_multiple > 0,
2459            "With batching, should see some dates with multiple entries"
2460        );
2461    }
2462
2463    #[test]
2464    fn test_temporal_patterns_business_days() {
2465        use datasynth_config::schema::{
2466            BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2467        };
2468
2469        let mut coa_gen =
2470            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2471        let coa = Arc::new(coa_gen.generate());
2472
2473        // Create temporal patterns config with business days enabled
2474        let temporal_config = TemporalPatternsConfig {
2475            enabled: true,
2476            business_days: BusinessDaySchemaConfig {
2477                enabled: true,
2478                ..Default::default()
2479            },
2480            calendars: CalendarSchemaConfig {
2481                regions: vec!["US".to_string()],
2482                custom_holidays: vec![],
2483            },
2484            ..Default::default()
2485        };
2486
2487        let mut je_gen = JournalEntryGenerator::new_with_params(
2488            TransactionConfig::default(),
2489            coa,
2490            vec!["1000".to_string()],
2491            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2492            NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), // Q1 2024
2493            42,
2494        )
2495        .with_temporal_patterns(temporal_config, 42)
2496        .with_persona_errors(false);
2497
2498        // Generate entries and verify none fall on weekends
2499        let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2500
2501        for entry in &entries {
2502            let weekday = entry.header.posting_date.weekday();
2503            assert!(
2504                weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2505                "Posting date {:?} should not be a weekend",
2506                entry.header.posting_date
2507            );
2508        }
2509    }
2510
2511    #[test]
2512    fn test_default_generation_filters_weekends() {
2513        // Verify that weekend entries are <5% even when temporal_patterns is NOT enabled.
2514        // This tests the fix where new_with_full_config always creates a default
2515        // BusinessDayCalculator with US holidays as a fallback.
2516        let mut coa_gen =
2517            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2518        let coa = Arc::new(coa_gen.generate());
2519
2520        let mut je_gen = JournalEntryGenerator::new_with_params(
2521            TransactionConfig::default(),
2522            coa,
2523            vec!["1000".to_string()],
2524            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2525            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2526            42,
2527        )
2528        .with_persona_errors(false);
2529
2530        let total = 500;
2531        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2532
2533        let weekend_count = entries
2534            .iter()
2535            .filter(|e| {
2536                let wd = e.header.posting_date.weekday();
2537                wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2538            })
2539            .count();
2540
2541        let weekend_pct = weekend_count as f64 / total as f64;
2542        assert!(
2543            weekend_pct < 0.05,
2544            "Expected weekend entries <5% of total without temporal_patterns enabled, \
2545             but got {:.1}% ({}/{})",
2546            weekend_pct * 100.0,
2547            weekend_count,
2548            total
2549        );
2550    }
2551
2552    #[test]
2553    fn test_document_type_derived_from_business_process() {
2554        let mut coa_gen =
2555            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2556        let coa = Arc::new(coa_gen.generate());
2557
2558        let mut je_gen = JournalEntryGenerator::new_with_params(
2559            TransactionConfig::default(),
2560            coa,
2561            vec!["1000".to_string()],
2562            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2563            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2564            99,
2565        )
2566        .with_persona_errors(false)
2567        .with_batching(false);
2568
2569        let total = 200;
2570        let mut doc_types = std::collections::HashSet::new();
2571        let mut sa_count = 0_usize;
2572
2573        for _ in 0..total {
2574            let entry = je_gen.generate();
2575            let dt = &entry.header.document_type;
2576            doc_types.insert(dt.clone());
2577            if dt == "SA" {
2578                sa_count += 1;
2579            }
2580        }
2581
2582        // Should have more than 3 distinct document types
2583        assert!(
2584            doc_types.len() > 3,
2585            "Expected >3 distinct document types, got {} ({:?})",
2586            doc_types.len(),
2587            doc_types,
2588        );
2589
2590        // "SA" should be less than 50% (R2R is 20% of the weight)
2591        let sa_pct = sa_count as f64 / total as f64;
2592        assert!(
2593            sa_pct < 0.50,
2594            "Expected SA <50%, got {:.1}% ({}/{})",
2595            sa_pct * 100.0,
2596            sa_count,
2597            total,
2598        );
2599    }
2600
2601    #[test]
2602    fn test_enrich_line_items_account_description() {
2603        let mut coa_gen =
2604            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2605        let coa = Arc::new(coa_gen.generate());
2606
2607        let mut je_gen = JournalEntryGenerator::new_with_params(
2608            TransactionConfig::default(),
2609            coa,
2610            vec!["1000".to_string()],
2611            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2612            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2613            42,
2614        )
2615        .with_persona_errors(false);
2616
2617        let total = 200;
2618        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2619
2620        // Count lines with account_description populated
2621        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2622        let lines_with_desc: usize = entries
2623            .iter()
2624            .flat_map(|e| &e.lines)
2625            .filter(|l| l.account_description.is_some())
2626            .count();
2627
2628        let desc_pct = lines_with_desc as f64 / total_lines as f64;
2629        assert!(
2630            desc_pct > 0.95,
2631            "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2632            desc_pct * 100.0,
2633            lines_with_desc,
2634            total_lines,
2635        );
2636    }
2637
2638    #[test]
2639    fn test_enrich_line_items_cost_center_for_expense_accounts() {
2640        let mut coa_gen =
2641            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2642        let coa = Arc::new(coa_gen.generate());
2643
2644        let mut je_gen = JournalEntryGenerator::new_with_params(
2645            TransactionConfig::default(),
2646            coa,
2647            vec!["1000".to_string()],
2648            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2649            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2650            42,
2651        )
2652        .with_persona_errors(false);
2653
2654        let total = 300;
2655        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2656
2657        // Count expense account lines (5xxx/6xxx) with cost_center populated
2658        let expense_lines: Vec<&JournalEntryLine> = entries
2659            .iter()
2660            .flat_map(|e| &e.lines)
2661            .filter(|l| {
2662                let first = l.gl_account.chars().next().unwrap_or('0');
2663                first == '5' || first == '6'
2664            })
2665            .collect();
2666
2667        if !expense_lines.is_empty() {
2668            let with_cc = expense_lines
2669                .iter()
2670                .filter(|l| l.cost_center.is_some())
2671                .count();
2672            let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2673            assert!(
2674                cc_pct > 0.80,
2675                "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2676                cc_pct * 100.0,
2677                with_cc,
2678                expense_lines.len(),
2679            );
2680        }
2681    }
2682
2683    #[test]
2684    fn test_enrich_line_items_profit_center_and_line_text() {
2685        let mut coa_gen =
2686            ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2687        let coa = Arc::new(coa_gen.generate());
2688
2689        let mut je_gen = JournalEntryGenerator::new_with_params(
2690            TransactionConfig::default(),
2691            coa,
2692            vec!["1000".to_string()],
2693            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2694            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2695            42,
2696        )
2697        .with_persona_errors(false);
2698
2699        let total = 100;
2700        let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2701
2702        let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2703
2704        // All lines should have profit_center
2705        let with_pc = entries
2706            .iter()
2707            .flat_map(|e| &e.lines)
2708            .filter(|l| l.profit_center.is_some())
2709            .count();
2710        let pc_pct = with_pc as f64 / total_lines as f64;
2711        assert!(
2712            pc_pct > 0.95,
2713            "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2714            pc_pct * 100.0,
2715            with_pc,
2716            total_lines,
2717        );
2718
2719        // All lines should have line_text (either from template or header fallback)
2720        let with_text = entries
2721            .iter()
2722            .flat_map(|e| &e.lines)
2723            .filter(|l| l.line_text.is_some())
2724            .count();
2725        let text_pct = with_text as f64 / total_lines as f64;
2726        assert!(
2727            text_pct > 0.95,
2728            "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2729            text_pct * 100.0,
2730            with_text,
2731            total_lines,
2732        );
2733    }
2734}