1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 batch_state: Option<BatchState>,
67 drift_controller: Option<DriftController>,
69 business_day_calculator: Option<BusinessDayCalculator>,
71 processing_lag_calculator: Option<ProcessingLagCalculator>,
72 temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75#[derive(Clone)]
80struct BatchState {
81 base_account_number: String,
83 base_amount: rust_decimal::Decimal,
84 base_business_process: Option<BusinessProcess>,
85 base_posting_date: NaiveDate,
86 remaining: u8,
88}
89
90impl JournalEntryGenerator {
91 pub fn new_with_params(
93 config: TransactionConfig,
94 coa: Arc<ChartOfAccounts>,
95 companies: Vec<String>,
96 start_date: NaiveDate,
97 end_date: NaiveDate,
98 seed: u64,
99 ) -> Self {
100 Self::new_with_full_config(
101 config,
102 coa,
103 companies,
104 start_date,
105 end_date,
106 seed,
107 TemplateConfig::default(),
108 None,
109 )
110 }
111
112 #[allow(clippy::too_many_arguments)]
114 pub fn new_with_full_config(
115 config: TransactionConfig,
116 coa: Arc<ChartOfAccounts>,
117 companies: Vec<String>,
118 start_date: NaiveDate,
119 end_date: NaiveDate,
120 seed: u64,
121 template_config: TemplateConfig,
122 user_pool: Option<UserPool>,
123 ) -> Self {
124 let user_pool = user_pool.or_else(|| {
126 if template_config.names.generate_realistic_names {
127 let user_gen_config = UserGeneratorConfig {
128 culture_distribution: vec![
129 (
130 datasynth_core::templates::NameCulture::WesternUs,
131 template_config.names.culture_distribution.western_us,
132 ),
133 (
134 datasynth_core::templates::NameCulture::Hispanic,
135 template_config.names.culture_distribution.hispanic,
136 ),
137 (
138 datasynth_core::templates::NameCulture::German,
139 template_config.names.culture_distribution.german,
140 ),
141 (
142 datasynth_core::templates::NameCulture::French,
143 template_config.names.culture_distribution.french,
144 ),
145 (
146 datasynth_core::templates::NameCulture::Chinese,
147 template_config.names.culture_distribution.chinese,
148 ),
149 (
150 datasynth_core::templates::NameCulture::Japanese,
151 template_config.names.culture_distribution.japanese,
152 ),
153 (
154 datasynth_core::templates::NameCulture::Indian,
155 template_config.names.culture_distribution.indian,
156 ),
157 ],
158 email_domain: template_config.names.email_domain.clone(),
159 generate_realistic_names: true,
160 };
161 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162 Some(user_gen.generate_standard(&companies))
163 } else {
164 None
165 }
166 });
167
168 let mut ref_gen = ReferenceGenerator::new(
170 start_date.year(),
171 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172 );
173 ref_gen.set_prefix(
174 ReferenceType::Invoice,
175 &template_config.references.invoice_prefix,
176 );
177 ref_gen.set_prefix(
178 ReferenceType::PurchaseOrder,
179 &template_config.references.po_prefix,
180 );
181 ref_gen.set_prefix(
182 ReferenceType::SalesOrder,
183 &template_config.references.so_prefix,
184 );
185
186 let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189 Self {
190 rng: seeded_rng(seed, 0),
191 seed,
192 config: config.clone(),
193 coa,
194 companies,
195 company_selector,
196 line_sampler: LineItemSampler::with_config(
197 seed + 1,
198 config.line_item_distribution.clone(),
199 config.even_odd_distribution.clone(),
200 config.debit_credit_distribution.clone(),
201 ),
202 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203 temporal_sampler: TemporalSampler::with_config(
204 seed + 3,
205 config.seasonality.clone(),
206 WorkingHoursConfig::default(),
207 Vec::new(),
208 ),
209 start_date,
210 end_date,
211 count: 0,
212 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213 user_pool,
214 description_generator: DescriptionGenerator::new(),
215 reference_generator: ref_gen,
216 template_config,
217 vendor_pool: VendorPool::standard(),
218 customer_pool: CustomerPool::standard(),
219 material_pool: None,
220 using_real_master_data: false,
221 fraud_config: FraudConfig::default(),
222 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
226 drift_controller: None,
227 business_day_calculator: None,
228 processing_lag_calculator: None,
229 temporal_patterns_config: None,
230 }
231 }
232
233 pub fn from_generator_config(
238 full_config: &GeneratorConfig,
239 coa: Arc<ChartOfAccounts>,
240 start_date: NaiveDate,
241 end_date: NaiveDate,
242 seed: u64,
243 ) -> Self {
244 let companies: Vec<String> = full_config
245 .companies
246 .iter()
247 .map(|c| c.code.clone())
248 .collect();
249
250 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253 let mut generator = Self::new_with_full_config(
254 full_config.transactions.clone(),
255 coa,
256 companies,
257 start_date,
258 end_date,
259 seed,
260 full_config.templates.clone(),
261 None,
262 );
263
264 generator.company_selector = company_selector;
266
267 generator.fraud_config = full_config.fraud.clone();
269
270 let temporal_config = &full_config.temporal_patterns;
272 if temporal_config.enabled {
273 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274 }
275
276 generator
277 }
278
279 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286 if config.business_days.enabled {
288 let region = config
289 .calendars
290 .regions
291 .first()
292 .map(|r| Self::parse_region(r))
293 .unwrap_or(Region::US);
294
295 let calendar = HolidayCalendar::new(region, self.start_date.year());
296 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297 }
298
299 if config.processing_lags.enabled {
301 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302 self.processing_lag_calculator =
303 Some(ProcessingLagCalculator::with_config(seed, lag_config));
304 }
305
306 let model = config.period_end.model.as_deref().unwrap_or("flat");
308 if model != "flat"
309 || config
310 .period_end
311 .month_end
312 .as_ref()
313 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314 {
315 let dynamics = Self::convert_period_end_config(&config.period_end);
316 self.temporal_sampler.set_period_end_dynamics(dynamics);
317 }
318
319 self.temporal_patterns_config = Some(config);
320 self
321 }
322
323 pub fn with_country_pack_temporal(
331 mut self,
332 config: TemporalPatternsConfig,
333 seed: u64,
334 pack: &CountryPack,
335 ) -> Self {
336 if config.business_days.enabled {
338 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340 }
341
342 if config.processing_lags.enabled {
344 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345 self.processing_lag_calculator =
346 Some(ProcessingLagCalculator::with_config(seed, lag_config));
347 }
348
349 let model = config.period_end.model.as_deref().unwrap_or("flat");
351 if model != "flat"
352 || config
353 .period_end
354 .month_end
355 .as_ref()
356 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357 {
358 let dynamics = Self::convert_period_end_config(&config.period_end);
359 self.temporal_sampler.set_period_end_dynamics(dynamics);
360 }
361
362 self.temporal_patterns_config = Some(config);
363 self
364 }
365
366 fn convert_processing_lag_config(
368 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369 ) -> ProcessingLagConfig {
370 let mut config = ProcessingLagConfig {
371 enabled: schema.enabled,
372 ..Default::default()
373 };
374
375 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378 if let Some(min) = lag.min_hours {
379 dist.min_lag_hours = min;
380 }
381 if let Some(max) = lag.max_hours {
382 dist.max_lag_hours = max;
383 }
384 dist
385 };
386
387 if let Some(ref lag) = schema.sales_order_lag {
389 config
390 .event_lags
391 .insert(EventType::SalesOrder, convert_lag(lag));
392 }
393 if let Some(ref lag) = schema.purchase_order_lag {
394 config
395 .event_lags
396 .insert(EventType::PurchaseOrder, convert_lag(lag));
397 }
398 if let Some(ref lag) = schema.goods_receipt_lag {
399 config
400 .event_lags
401 .insert(EventType::GoodsReceipt, convert_lag(lag));
402 }
403 if let Some(ref lag) = schema.invoice_receipt_lag {
404 config
405 .event_lags
406 .insert(EventType::InvoiceReceipt, convert_lag(lag));
407 }
408 if let Some(ref lag) = schema.invoice_issue_lag {
409 config
410 .event_lags
411 .insert(EventType::InvoiceIssue, convert_lag(lag));
412 }
413 if let Some(ref lag) = schema.payment_lag {
414 config
415 .event_lags
416 .insert(EventType::Payment, convert_lag(lag));
417 }
418 if let Some(ref lag) = schema.journal_entry_lag {
419 config
420 .event_lags
421 .insert(EventType::JournalEntry, convert_lag(lag));
422 }
423
424 if let Some(ref cross_day) = schema.cross_day_posting {
426 config.cross_day = CrossDayConfig {
427 enabled: cross_day.enabled,
428 probability_by_hour: cross_day.probability_by_hour.clone(),
429 ..Default::default()
430 };
431 }
432
433 config
434 }
435
436 fn convert_period_end_config(
438 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439 ) -> PeriodEndDynamics {
440 let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442 let convert_period =
444 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445 default_peak: f64|
446 -> PeriodEndConfig {
447 if let Some(p) = period {
448 let model = match model_type {
449 "flat" => PeriodEndModel::FlatMultiplier {
450 multiplier: p.peak_multiplier.unwrap_or(default_peak),
451 },
452 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453 start_day: p.start_day.unwrap_or(-10),
454 sustained_high_days: p.sustained_high_days.unwrap_or(3),
455 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456 ramp_up_days: 3, },
458 _ => PeriodEndModel::ExponentialAcceleration {
459 start_day: p.start_day.unwrap_or(-10),
460 base_multiplier: p.base_multiplier.unwrap_or(1.0),
461 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462 decay_rate: p.decay_rate.unwrap_or(0.3),
463 },
464 };
465 PeriodEndConfig {
466 enabled: true,
467 model,
468 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469 }
470 } else {
471 PeriodEndConfig {
472 enabled: true,
473 model: PeriodEndModel::ExponentialAcceleration {
474 start_day: -10,
475 base_multiplier: 1.0,
476 peak_multiplier: default_peak,
477 decay_rate: 0.3,
478 },
479 additional_multiplier: 1.0,
480 }
481 }
482 };
483
484 PeriodEndDynamics::new(
485 convert_period(schema.month_end.as_ref(), 2.0),
486 convert_period(schema.quarter_end.as_ref(), 3.5),
487 convert_period(schema.year_end.as_ref(), 5.0),
488 )
489 }
490
491 fn parse_region(region_str: &str) -> Region {
493 match region_str.to_uppercase().as_str() {
494 "US" => Region::US,
495 "DE" => Region::DE,
496 "GB" => Region::GB,
497 "CN" => Region::CN,
498 "JP" => Region::JP,
499 "IN" => Region::IN,
500 "BR" => Region::BR,
501 "MX" => Region::MX,
502 "AU" => Region::AU,
503 "SG" => Region::SG,
504 "KR" => Region::KR,
505 "FR" => Region::FR,
506 "IT" => Region::IT,
507 "ES" => Region::ES,
508 "CA" => Region::CA,
509 _ => Region::US,
510 }
511 }
512
513 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
515 self.company_selector = selector;
516 }
517
518 pub fn company_selector(&self) -> &WeightedCompanySelector {
520 &self.company_selector
521 }
522
523 pub fn set_fraud_config(&mut self, config: FraudConfig) {
525 self.fraud_config = config;
526 }
527
528 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
533 if !vendors.is_empty() {
534 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
535 self.using_real_master_data = true;
536 }
537 self
538 }
539
540 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
545 if !customers.is_empty() {
546 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
547 self.using_real_master_data = true;
548 }
549 self
550 }
551
552 pub fn with_materials(mut self, materials: &[Material]) -> Self {
556 if !materials.is_empty() {
557 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
558 self.using_real_master_data = true;
559 }
560 self
561 }
562
563 pub fn with_master_data(
568 self,
569 vendors: &[Vendor],
570 customers: &[Customer],
571 materials: &[Material],
572 ) -> Self {
573 self.with_vendors(vendors)
574 .with_customers(customers)
575 .with_materials(materials)
576 }
577
578 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
585 let name_gen =
586 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
587 let config = UserGeneratorConfig {
588 culture_distribution: Vec::new(),
591 email_domain: name_gen.email_domain().to_string(),
592 generate_realistic_names: true,
593 };
594 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
595 self.user_pool = Some(user_gen.generate_standard(&self.companies));
596 self
597 }
598
599 pub fn is_using_real_master_data(&self) -> bool {
601 self.using_real_master_data
602 }
603
604 fn determine_fraud(&mut self) -> Option<FraudType> {
606 if !self.fraud_config.enabled {
607 return None;
608 }
609
610 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
612 return None;
613 }
614
615 Some(self.select_fraud_type())
617 }
618
619 fn select_fraud_type(&mut self) -> FraudType {
621 let dist = &self.fraud_config.fraud_type_distribution;
622 let roll: f64 = self.rng.random();
623
624 let mut cumulative = 0.0;
625
626 cumulative += dist.suspense_account_abuse;
627 if roll < cumulative {
628 return FraudType::SuspenseAccountAbuse;
629 }
630
631 cumulative += dist.fictitious_transaction;
632 if roll < cumulative {
633 return FraudType::FictitiousTransaction;
634 }
635
636 cumulative += dist.revenue_manipulation;
637 if roll < cumulative {
638 return FraudType::RevenueManipulation;
639 }
640
641 cumulative += dist.expense_capitalization;
642 if roll < cumulative {
643 return FraudType::ExpenseCapitalization;
644 }
645
646 cumulative += dist.split_transaction;
647 if roll < cumulative {
648 return FraudType::SplitTransaction;
649 }
650
651 cumulative += dist.timing_anomaly;
652 if roll < cumulative {
653 return FraudType::TimingAnomaly;
654 }
655
656 cumulative += dist.unauthorized_access;
657 if roll < cumulative {
658 return FraudType::UnauthorizedAccess;
659 }
660
661 FraudType::DuplicatePayment
663 }
664
665 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
667 match fraud_type {
668 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
669 FraudAmountPattern::ThresholdAdjacent
670 }
671 FraudType::FictitiousTransaction
672 | FraudType::FictitiousEntry
673 | FraudType::SuspenseAccountAbuse
674 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
675 FraudType::RevenueManipulation
676 | FraudType::ExpenseCapitalization
677 | FraudType::ImproperCapitalization
678 | FraudType::ReserveManipulation
679 | FraudType::UnauthorizedAccess
680 | FraudType::PrematureRevenue
681 | FraudType::UnderstatedLiabilities
682 | FraudType::OverstatedAssets
683 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
684 FraudType::DuplicatePayment
685 | FraudType::TimingAnomaly
686 | FraudType::SelfApproval
687 | FraudType::ExceededApprovalLimit
688 | FraudType::SegregationOfDutiesViolation
689 | FraudType::UnauthorizedApproval
690 | FraudType::CollusiveApproval
691 | FraudType::FictitiousVendor
692 | FraudType::ShellCompanyPayment
693 | FraudType::Kickback
694 | FraudType::KickbackScheme
695 | FraudType::InvoiceManipulation
696 | FraudType::AssetMisappropriation
697 | FraudType::InventoryTheft
698 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
699 FraudType::ImproperRevenueRecognition
701 | FraudType::ImproperPoAllocation
702 | FraudType::VariableConsiderationManipulation
703 | FraudType::ContractModificationMisstatement => {
704 FraudAmountPattern::StatisticallyImprobable
705 }
706 FraudType::LeaseClassificationManipulation
708 | FraudType::OffBalanceSheetLease
709 | FraudType::LeaseLiabilityUnderstatement
710 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
711 FraudType::FairValueHierarchyManipulation
713 | FraudType::Level3InputManipulation
714 | FraudType::ValuationTechniqueManipulation => {
715 FraudAmountPattern::StatisticallyImprobable
716 }
717 FraudType::DelayedImpairment
719 | FraudType::ImpairmentTestAvoidance
720 | FraudType::CashFlowProjectionManipulation
721 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
722 FraudType::BidRigging
724 | FraudType::PhantomVendorContract
725 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
726 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
727 FraudType::GhostEmployeePayroll
729 | FraudType::PayrollInflation
730 | FraudType::DuplicateExpenseReport
731 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
732 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
733 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
735 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
736 }
737 }
738
739 #[inline]
741 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
742 self.uuid_factory.next()
743 }
744
745 pub fn generate(&mut self) -> JournalEntry {
747 debug!(
748 count = self.count,
749 companies = self.companies.len(),
750 start_date = %self.start_date,
751 end_date = %self.end_date,
752 "Generating journal entry"
753 );
754
755 if let Some(ref state) = self.batch_state {
757 if state.remaining > 0 {
758 return self.generate_batched_entry();
759 }
760 }
761
762 self.count += 1;
763
764 let document_id = self.generate_deterministic_uuid();
766
767 let mut posting_date = self
769 .temporal_sampler
770 .sample_date(self.start_date, self.end_date);
771
772 if let Some(ref calc) = self.business_day_calculator {
774 if !calc.is_business_day(posting_date) {
775 posting_date = calc.next_business_day(posting_date, false);
777 if posting_date > self.end_date {
779 posting_date = calc.prev_business_day(self.end_date, true);
780 }
781 }
782 }
783
784 let company_code = self.company_selector.select(&mut self.rng).to_string();
786
787 let line_spec = self.line_sampler.sample();
789
790 let source = self.select_source();
792 let is_automated = matches!(
793 source,
794 TransactionSource::Automated | TransactionSource::Recurring
795 );
796
797 let business_process = self.select_business_process();
799
800 let fraud_type = self.determine_fraud();
802 let is_fraud = fraud_type.is_some();
803
804 let time = self.temporal_sampler.sample_time(!is_automated);
806 let created_at = posting_date.and_time(time).and_utc();
807
808 let (created_by, user_persona) = self.select_user(is_automated);
810
811 let mut header =
813 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
814 header.created_at = created_at;
815 header.source = source;
816 header.created_by = created_by;
817 header.user_persona = user_persona;
818 header.business_process = Some(business_process);
819 header.is_fraud = is_fraud;
820 header.fraud_type = fraud_type;
821
822 let mut context =
824 DescriptionContext::with_period(posting_date.month(), posting_date.year());
825
826 match business_process {
828 BusinessProcess::P2P => {
829 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
830 context.vendor_name = Some(vendor.name.clone());
831 }
832 }
833 BusinessProcess::O2C => {
834 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
835 context.customer_name = Some(customer.name.clone());
836 }
837 }
838 _ => {}
839 }
840
841 if self.template_config.descriptions.generate_header_text {
843 header.header_text = Some(self.description_generator.generate_header_text(
844 business_process,
845 &context,
846 &mut self.rng,
847 ));
848 }
849
850 if self.template_config.references.generate_references {
852 header.reference = Some(
853 self.reference_generator
854 .generate_for_process_year(business_process, posting_date.year()),
855 );
856 }
857
858 let mut entry = JournalEntry::new(header);
860
861 let base_amount = if let Some(ft) = fraud_type {
863 let pattern = self.fraud_type_to_amount_pattern(ft);
864 self.amount_sampler.sample_fraud(pattern)
865 } else {
866 self.amount_sampler.sample()
867 };
868
869 let drift_adjusted_amount = {
871 let drift = self.get_drift_adjustments(posting_date);
872 if drift.amount_mean_multiplier != 1.0 {
873 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
875 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
876 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
877 } else {
878 base_amount
879 }
880 };
881
882 let total_amount = if is_automated {
884 drift_adjusted_amount } else {
886 self.apply_human_variation(drift_adjusted_amount)
887 };
888
889 let debit_amounts = self
891 .amount_sampler
892 .sample_summing_to(line_spec.debit_count, total_amount);
893 for (i, amount) in debit_amounts.into_iter().enumerate() {
894 let account_number = self.select_debit_account().account_number.clone();
895 let mut line = JournalEntryLine::debit(
896 entry.header.document_id,
897 (i + 1) as u32,
898 account_number.clone(),
899 amount,
900 );
901
902 if self.template_config.descriptions.generate_line_text {
904 line.line_text = Some(self.description_generator.generate_line_text(
905 &account_number,
906 &context,
907 &mut self.rng,
908 ));
909 }
910
911 entry.add_line(line);
912 }
913
914 let credit_amounts = self
916 .amount_sampler
917 .sample_summing_to(line_spec.credit_count, total_amount);
918 for (i, amount) in credit_amounts.into_iter().enumerate() {
919 let account_number = self.select_credit_account().account_number.clone();
920 let mut line = JournalEntryLine::credit(
921 entry.header.document_id,
922 (line_spec.debit_count + i + 1) as u32,
923 account_number.clone(),
924 amount,
925 );
926
927 if self.template_config.descriptions.generate_line_text {
929 line.line_text = Some(self.description_generator.generate_line_text(
930 &account_number,
931 &context,
932 &mut self.rng,
933 ));
934 }
935
936 entry.add_line(line);
937 }
938
939 if self.persona_errors_enabled && !is_automated {
941 self.maybe_inject_persona_error(&mut entry);
942 }
943
944 if self.approval_enabled {
946 self.maybe_apply_approval_workflow(&mut entry, posting_date);
947 }
948
949 self.maybe_start_batch(&entry);
951
952 entry
953 }
954
955 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
960 self.persona_errors_enabled = enabled;
961 self
962 }
963
964 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
969 self.fraud_config = config;
970 self
971 }
972
973 pub fn persona_errors_enabled(&self) -> bool {
975 self.persona_errors_enabled
976 }
977
978 pub fn with_batching(mut self, enabled: bool) -> Self {
983 if !enabled {
984 self.batch_state = None;
985 }
986 self
987 }
988
989 pub fn batching_enabled(&self) -> bool {
991 true
993 }
994
995 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1000 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1002 return;
1003 }
1004
1005 if self.rng.random::<f64>() > 0.15 {
1007 return;
1008 }
1009
1010 let base_account = entry
1012 .lines
1013 .first()
1014 .map(|l| l.gl_account.clone())
1015 .unwrap_or_default();
1016
1017 let base_amount = entry.total_debit();
1018
1019 self.batch_state = Some(BatchState {
1020 base_account_number: base_account,
1021 base_amount,
1022 base_business_process: entry.header.business_process,
1023 base_posting_date: entry.header.posting_date,
1024 remaining: self.rng.random_range(2..7), });
1026 }
1027
1028 fn generate_batched_entry(&mut self) -> JournalEntry {
1036 use rust_decimal::Decimal;
1037
1038 if let Some(ref mut state) = self.batch_state {
1040 state.remaining = state.remaining.saturating_sub(1);
1041 }
1042
1043 let Some(batch) = self.batch_state.clone() else {
1044 tracing::warn!(
1047 "generate_batched_entry called without batch_state; generating standard entry"
1048 );
1049 self.batch_state = None;
1050 return self.generate();
1051 };
1052
1053 let posting_date = batch.base_posting_date;
1055
1056 self.count += 1;
1057 let document_id = self.generate_deterministic_uuid();
1058
1059 let company_code = self.company_selector.select(&mut self.rng).to_string();
1061
1062 let _line_spec = LineItemSpec {
1064 total_count: 2,
1065 debit_count: 1,
1066 credit_count: 1,
1067 split_type: DebitCreditSplit::Equal,
1068 };
1069
1070 let source = TransactionSource::Manual;
1072
1073 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1075
1076 let time = self.temporal_sampler.sample_time(true);
1078 let created_at = posting_date.and_time(time).and_utc();
1079
1080 let (created_by, user_persona) = self.select_user(false);
1082
1083 let mut header =
1085 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1086 header.created_at = created_at;
1087 header.source = source;
1088 header.created_by = created_by;
1089 header.user_persona = user_persona;
1090 header.business_process = Some(business_process);
1091
1092 let variation = self.rng.random_range(-0.15..0.15);
1094 let varied_amount =
1095 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1096 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1097
1098 let mut entry = JournalEntry::new(header);
1100
1101 let debit_line = JournalEntryLine::debit(
1103 entry.header.document_id,
1104 1,
1105 batch.base_account_number.clone(),
1106 total_amount,
1107 );
1108 entry.add_line(debit_line);
1109
1110 let credit_account = self.select_credit_account().account_number.clone();
1112 let credit_line =
1113 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1114 entry.add_line(credit_line);
1115
1116 if self.persona_errors_enabled {
1118 self.maybe_inject_persona_error(&mut entry);
1119 }
1120
1121 if self.approval_enabled {
1123 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1124 }
1125
1126 if batch.remaining <= 1 {
1128 self.batch_state = None;
1129 }
1130
1131 entry
1132 }
1133
1134 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1136 let persona_str = &entry.header.user_persona;
1138 let persona = match persona_str.to_lowercase().as_str() {
1139 s if s.contains("junior") => UserPersona::JuniorAccountant,
1140 s if s.contains("senior") => UserPersona::SeniorAccountant,
1141 s if s.contains("controller") => UserPersona::Controller,
1142 s if s.contains("manager") => UserPersona::Manager,
1143 s if s.contains("executive") => UserPersona::Executive,
1144 _ => return, };
1146
1147 let base_error_rate = persona.error_rate();
1149
1150 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1152
1153 if self.rng.random::<f64>() >= adjusted_rate {
1155 return; }
1157
1158 self.inject_human_error(entry, persona);
1160 }
1161
1162 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1171 use chrono::Datelike;
1172
1173 let mut rate = base_rate;
1174 let day = posting_date.day();
1175 let month = posting_date.month();
1176
1177 if month == 12 && day >= 28 {
1179 rate *= 2.0;
1180 return rate.min(0.5); }
1182
1183 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1185 rate *= 1.75; return rate.min(0.4);
1187 }
1188
1189 if day >= 28 {
1191 rate *= 1.5; }
1193
1194 let weekday = posting_date.weekday();
1196 match weekday {
1197 chrono::Weekday::Mon => {
1198 rate *= 1.2;
1200 }
1201 chrono::Weekday::Fri => {
1202 rate *= 1.3;
1204 }
1205 _ => {}
1206 }
1207
1208 rate.min(0.4)
1210 }
1211
1212 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1221 use rust_decimal::Decimal;
1222
1223 if amount < Decimal::from(10) {
1225 return amount;
1226 }
1227
1228 if self.rng.random::<f64>() > 0.70 {
1230 return amount;
1231 }
1232
1233 let variation_type: u8 = self.rng.random_range(0..4);
1235
1236 match variation_type {
1237 0 => {
1238 let variation_pct = self.rng.random_range(-0.02..0.02);
1240 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1241 (amount + variation).round_dp(2)
1242 }
1243 1 => {
1244 let ten = Decimal::from(10);
1246 (amount / ten).round() * ten
1247 }
1248 2 => {
1249 if amount >= Decimal::from(500) {
1251 let hundred = Decimal::from(100);
1252 (amount / hundred).round() * hundred
1253 } else {
1254 amount
1255 }
1256 }
1257 3 => {
1258 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1260 (amount + cents).max(Decimal::ZERO).round_dp(2)
1261 }
1262 _ => amount,
1263 }
1264 }
1265
1266 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1272 let balancing_idx = entry.lines.iter().position(|l| {
1274 if modified_was_debit {
1275 l.credit_amount > Decimal::ZERO
1276 } else {
1277 l.debit_amount > Decimal::ZERO
1278 }
1279 });
1280
1281 if let Some(idx) = balancing_idx {
1282 if modified_was_debit {
1283 entry.lines[idx].credit_amount += impact;
1284 } else {
1285 entry.lines[idx].debit_amount += impact;
1286 }
1287 }
1288 }
1289
1290 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1295 use rust_decimal::Decimal;
1296
1297 let error_type: u8 = match persona {
1299 UserPersona::JuniorAccountant => {
1300 self.rng.random_range(0..5)
1302 }
1303 UserPersona::SeniorAccountant => {
1304 self.rng.random_range(0..3)
1306 }
1307 UserPersona::Controller | UserPersona::Manager => {
1308 self.rng.random_range(3..5)
1310 }
1311 _ => return,
1312 };
1313
1314 match error_type {
1315 0 => {
1316 if let Some(line) = entry.lines.get_mut(0) {
1318 let is_debit = line.debit_amount > Decimal::ZERO;
1319 let original_amount = if is_debit {
1320 line.debit_amount
1321 } else {
1322 line.credit_amount
1323 };
1324
1325 let s = original_amount.to_string();
1327 if s.len() >= 2 {
1328 let chars: Vec<char> = s.chars().collect();
1329 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1330 if chars[pos].is_ascii_digit()
1331 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1332 {
1333 let mut new_chars = chars;
1334 new_chars.swap(pos, pos + 1);
1335 if let Ok(new_amount) =
1336 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1337 {
1338 let impact = new_amount - original_amount;
1339
1340 if is_debit {
1342 entry.lines[0].debit_amount = new_amount;
1343 } else {
1344 entry.lines[0].credit_amount = new_amount;
1345 }
1346
1347 Self::rebalance_entry(entry, is_debit, impact);
1349
1350 entry.header.header_text = Some(
1351 entry.header.header_text.clone().unwrap_or_default()
1352 + " [HUMAN_ERROR:TRANSPOSITION]",
1353 );
1354 }
1355 }
1356 }
1357 }
1358 }
1359 1 => {
1360 if let Some(line) = entry.lines.get_mut(0) {
1362 let is_debit = line.debit_amount > Decimal::ZERO;
1363 let original_amount = if is_debit {
1364 line.debit_amount
1365 } else {
1366 line.credit_amount
1367 };
1368
1369 let new_amount = original_amount * Decimal::new(10, 0);
1370 let impact = new_amount - original_amount;
1371
1372 if is_debit {
1374 entry.lines[0].debit_amount = new_amount;
1375 } else {
1376 entry.lines[0].credit_amount = new_amount;
1377 }
1378
1379 Self::rebalance_entry(entry, is_debit, impact);
1381
1382 entry.header.header_text = Some(
1383 entry.header.header_text.clone().unwrap_or_default()
1384 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1385 );
1386 }
1387 }
1388 2 => {
1389 if let Some(ref mut text) = entry.header.header_text {
1391 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1392 let correct = ["the", "and", "with", "that", "receive"];
1393 let idx = self.rng.random_range(0..typos.len());
1394 if text.to_lowercase().contains(correct[idx]) {
1395 *text = text.replace(correct[idx], typos[idx]);
1396 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1397 }
1398 }
1399 }
1400 3 => {
1401 if let Some(line) = entry.lines.get_mut(0) {
1403 let is_debit = line.debit_amount > Decimal::ZERO;
1404 let original_amount = if is_debit {
1405 line.debit_amount
1406 } else {
1407 line.credit_amount
1408 };
1409
1410 let new_amount =
1411 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1412 let impact = new_amount - original_amount;
1413
1414 if is_debit {
1416 entry.lines[0].debit_amount = new_amount;
1417 } else {
1418 entry.lines[0].credit_amount = new_amount;
1419 }
1420
1421 Self::rebalance_entry(entry, is_debit, impact);
1423
1424 entry.header.header_text = Some(
1425 entry.header.header_text.clone().unwrap_or_default()
1426 + " [HUMAN_ERROR:ROUNDED]",
1427 );
1428 }
1429 }
1430 4 => {
1431 if entry.header.document_date == entry.header.posting_date {
1434 let days_late = self.rng.random_range(5..15);
1435 entry.header.document_date =
1436 entry.header.posting_date - chrono::Duration::days(days_late);
1437 entry.header.header_text = Some(
1438 entry.header.header_text.clone().unwrap_or_default()
1439 + " [HUMAN_ERROR:LATE_POSTING]",
1440 );
1441 }
1442 }
1443 _ => {}
1444 }
1445 }
1446
1447 fn maybe_apply_approval_workflow(
1452 &mut self,
1453 entry: &mut JournalEntry,
1454 _posting_date: NaiveDate,
1455 ) {
1456 use rust_decimal::Decimal;
1457
1458 let amount = entry.total_debit();
1459
1460 if amount <= self.approval_threshold {
1462 let workflow = ApprovalWorkflow::auto_approved(
1464 entry.header.created_by.clone(),
1465 entry.header.user_persona.clone(),
1466 amount,
1467 entry.header.created_at,
1468 );
1469 entry.header.approval_workflow = Some(workflow);
1470 return;
1471 }
1472
1473 entry.header.sox_relevant = true;
1475
1476 let required_levels = if amount > Decimal::new(100000, 0) {
1478 3 } else if amount > Decimal::new(50000, 0) {
1480 2 } else {
1482 1 };
1484
1485 let mut workflow = ApprovalWorkflow::new(
1487 entry.header.created_by.clone(),
1488 entry.header.user_persona.clone(),
1489 amount,
1490 );
1491 workflow.required_levels = required_levels;
1492
1493 let submit_time = entry.header.created_at;
1495 let submit_action = ApprovalAction::new(
1496 entry.header.created_by.clone(),
1497 entry.header.user_persona.clone(),
1498 self.parse_persona(&entry.header.user_persona),
1499 ApprovalActionType::Submit,
1500 0,
1501 )
1502 .with_timestamp(submit_time);
1503
1504 workflow.actions.push(submit_action);
1505 workflow.status = ApprovalStatus::Pending;
1506 workflow.submitted_at = Some(submit_time);
1507
1508 let mut current_time = submit_time;
1510 for level in 1..=required_levels {
1511 let delay_hours = self.rng.random_range(1..4);
1513 current_time += chrono::Duration::hours(delay_hours);
1514
1515 while current_time.weekday() == chrono::Weekday::Sat
1517 || current_time.weekday() == chrono::Weekday::Sun
1518 {
1519 current_time += chrono::Duration::days(1);
1520 }
1521
1522 let (approver_id, approver_role) = self.select_approver(level);
1524
1525 let approve_action = ApprovalAction::new(
1526 approver_id.clone(),
1527 format!("{:?}", approver_role),
1528 approver_role,
1529 ApprovalActionType::Approve,
1530 level,
1531 )
1532 .with_timestamp(current_time);
1533
1534 workflow.actions.push(approve_action);
1535 workflow.current_level = level;
1536 }
1537
1538 workflow.status = ApprovalStatus::Approved;
1540 workflow.approved_at = Some(current_time);
1541
1542 entry.header.approval_workflow = Some(workflow);
1543 }
1544
1545 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1547 let persona = match level {
1548 1 => UserPersona::Manager,
1549 2 => UserPersona::Controller,
1550 _ => UserPersona::Executive,
1551 };
1552
1553 if let Some(ref pool) = self.user_pool {
1555 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1556 return (user.user_id.clone(), persona);
1557 }
1558 }
1559
1560 let approver_id = match persona {
1562 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1563 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1564 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1565 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1566 };
1567
1568 (approver_id, persona)
1569 }
1570
1571 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1573 match persona_str.to_lowercase().as_str() {
1574 s if s.contains("junior") => UserPersona::JuniorAccountant,
1575 s if s.contains("senior") => UserPersona::SeniorAccountant,
1576 s if s.contains("controller") => UserPersona::Controller,
1577 s if s.contains("manager") => UserPersona::Manager,
1578 s if s.contains("executive") => UserPersona::Executive,
1579 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1580 _ => UserPersona::JuniorAccountant, }
1582 }
1583
1584 pub fn with_approval(mut self, enabled: bool) -> Self {
1586 self.approval_enabled = enabled;
1587 self
1588 }
1589
1590 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1592 self.approval_threshold = threshold;
1593 self
1594 }
1595
1596 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1602 self.drift_controller = Some(controller);
1603 self
1604 }
1605
1606 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1611 if config.enabled {
1612 let total_periods = self.calculate_total_periods();
1613 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1614 }
1615 self
1616 }
1617
1618 fn calculate_total_periods(&self) -> u32 {
1620 let start_year = self.start_date.year();
1621 let start_month = self.start_date.month();
1622 let end_year = self.end_date.year();
1623 let end_month = self.end_date.month();
1624
1625 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1626 }
1627
1628 fn date_to_period(&self, date: NaiveDate) -> u32 {
1630 let start_year = self.start_date.year();
1631 let start_month = self.start_date.month() as i32;
1632 let date_year = date.year();
1633 let date_month = date.month() as i32;
1634
1635 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1636 }
1637
1638 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1640 if let Some(ref controller) = self.drift_controller {
1641 let period = self.date_to_period(date);
1642 controller.compute_adjustments(period)
1643 } else {
1644 DriftAdjustments::none()
1645 }
1646 }
1647
1648 #[inline]
1650 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1651 if let Some(ref pool) = self.user_pool {
1652 let persona = if is_automated {
1653 UserPersona::AutomatedSystem
1654 } else {
1655 let roll: f64 = self.rng.random();
1657 if roll < 0.4 {
1658 UserPersona::JuniorAccountant
1659 } else if roll < 0.7 {
1660 UserPersona::SeniorAccountant
1661 } else if roll < 0.85 {
1662 UserPersona::Controller
1663 } else {
1664 UserPersona::Manager
1665 }
1666 };
1667
1668 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1669 return (
1670 user.user_id.clone(),
1671 format!("{:?}", user.persona).to_lowercase(),
1672 );
1673 }
1674 }
1675
1676 if is_automated {
1678 (
1679 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1680 "automated_system".to_string(),
1681 )
1682 } else {
1683 (
1684 format!("USER{:04}", self.rng.random_range(1..=40)),
1685 "senior_accountant".to_string(),
1686 )
1687 }
1688 }
1689
1690 #[inline]
1692 fn select_source(&mut self) -> TransactionSource {
1693 let roll: f64 = self.rng.random();
1694 let dist = &self.config.source_distribution;
1695
1696 if roll < dist.manual {
1697 TransactionSource::Manual
1698 } else if roll < dist.manual + dist.automated {
1699 TransactionSource::Automated
1700 } else if roll < dist.manual + dist.automated + dist.recurring {
1701 TransactionSource::Recurring
1702 } else {
1703 TransactionSource::Adjustment
1704 }
1705 }
1706
1707 #[inline]
1709 fn select_business_process(&mut self) -> BusinessProcess {
1710 let roll: f64 = self.rng.random();
1711
1712 if roll < 0.35 {
1714 BusinessProcess::O2C
1715 } else if roll < 0.65 {
1716 BusinessProcess::P2P
1717 } else if roll < 0.85 {
1718 BusinessProcess::R2R
1719 } else if roll < 0.95 {
1720 BusinessProcess::H2R
1721 } else {
1722 BusinessProcess::A2R
1723 }
1724 }
1725
1726 #[inline]
1727 fn select_debit_account(&mut self) -> &GLAccount {
1728 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1729 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1730
1731 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1733 accounts
1734 } else {
1735 expense_accounts
1736 };
1737
1738 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1739 tracing::warn!(
1740 "Account selection returned empty list, falling back to first COA account"
1741 );
1742 &self.coa.accounts[0]
1743 })
1744 }
1745
1746 #[inline]
1747 fn select_credit_account(&mut self) -> &GLAccount {
1748 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1749 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1750
1751 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1753 liability_accounts
1754 } else {
1755 revenue_accounts
1756 };
1757
1758 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1759 tracing::warn!(
1760 "Account selection returned empty list, falling back to first COA account"
1761 );
1762 &self.coa.accounts[0]
1763 })
1764 }
1765}
1766
1767impl Generator for JournalEntryGenerator {
1768 type Item = JournalEntry;
1769 type Config = (
1770 TransactionConfig,
1771 Arc<ChartOfAccounts>,
1772 Vec<String>,
1773 NaiveDate,
1774 NaiveDate,
1775 );
1776
1777 fn new(config: Self::Config, seed: u64) -> Self {
1778 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1779 }
1780
1781 fn generate_one(&mut self) -> Self::Item {
1782 self.generate()
1783 }
1784
1785 fn reset(&mut self) {
1786 self.rng = seeded_rng(self.seed, 0);
1787 self.line_sampler.reset(self.seed + 1);
1788 self.amount_sampler.reset(self.seed + 2);
1789 self.temporal_sampler.reset(self.seed + 3);
1790 self.count = 0;
1791 self.uuid_factory.reset();
1792
1793 let mut ref_gen = ReferenceGenerator::new(
1795 self.start_date.year(),
1796 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1797 );
1798 ref_gen.set_prefix(
1799 ReferenceType::Invoice,
1800 &self.template_config.references.invoice_prefix,
1801 );
1802 ref_gen.set_prefix(
1803 ReferenceType::PurchaseOrder,
1804 &self.template_config.references.po_prefix,
1805 );
1806 ref_gen.set_prefix(
1807 ReferenceType::SalesOrder,
1808 &self.template_config.references.so_prefix,
1809 );
1810 self.reference_generator = ref_gen;
1811 }
1812
1813 fn count(&self) -> u64 {
1814 self.count
1815 }
1816
1817 fn seed(&self) -> u64 {
1818 self.seed
1819 }
1820}
1821
1822use datasynth_core::traits::ParallelGenerator;
1823
1824impl ParallelGenerator for JournalEntryGenerator {
1825 fn split(self, parts: usize) -> Vec<Self> {
1831 let parts = parts.max(1);
1832 (0..parts)
1833 .map(|i| {
1834 let sub_seed = self
1836 .seed
1837 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1838
1839 let mut gen = JournalEntryGenerator::new_with_full_config(
1840 self.config.clone(),
1841 Arc::clone(&self.coa),
1842 self.companies.clone(),
1843 self.start_date,
1844 self.end_date,
1845 sub_seed,
1846 self.template_config.clone(),
1847 self.user_pool.clone(),
1848 );
1849
1850 gen.company_selector = self.company_selector.clone();
1852 gen.vendor_pool = self.vendor_pool.clone();
1853 gen.customer_pool = self.customer_pool.clone();
1854 gen.material_pool = self.material_pool.clone();
1855 gen.using_real_master_data = self.using_real_master_data;
1856 gen.fraud_config = self.fraud_config.clone();
1857 gen.persona_errors_enabled = self.persona_errors_enabled;
1858 gen.approval_enabled = self.approval_enabled;
1859 gen.approval_threshold = self.approval_threshold;
1860
1861 gen.uuid_factory = DeterministicUuidFactory::for_partition(
1863 sub_seed,
1864 GeneratorType::JournalEntry,
1865 i as u8,
1866 );
1867
1868 if let Some(ref config) = self.temporal_patterns_config {
1870 gen.temporal_patterns_config = Some(config.clone());
1871 if config.business_days.enabled {
1873 if let Some(ref bdc) = self.business_day_calculator {
1874 gen.business_day_calculator = Some(bdc.clone());
1875 }
1876 }
1877 if config.processing_lags.enabled {
1879 let lag_config =
1880 Self::convert_processing_lag_config(&config.processing_lags);
1881 gen.processing_lag_calculator =
1882 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
1883 }
1884 }
1885
1886 if let Some(ref dc) = self.drift_controller {
1888 gen.drift_controller = Some(dc.clone());
1889 }
1890
1891 gen
1892 })
1893 .collect()
1894 }
1895}
1896
1897#[cfg(test)]
1898#[allow(clippy::unwrap_used)]
1899mod tests {
1900 use super::*;
1901 use crate::ChartOfAccountsGenerator;
1902
1903 #[test]
1904 fn test_generate_balanced_entries() {
1905 let mut coa_gen =
1906 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1907 let coa = Arc::new(coa_gen.generate());
1908
1909 let mut je_gen = JournalEntryGenerator::new_with_params(
1910 TransactionConfig::default(),
1911 coa,
1912 vec!["1000".to_string()],
1913 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1914 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1915 42,
1916 );
1917
1918 let mut balanced_count = 0;
1919 for _ in 0..100 {
1920 let entry = je_gen.generate();
1921
1922 let has_human_error = entry
1924 .header
1925 .header_text
1926 .as_ref()
1927 .map(|t| t.contains("[HUMAN_ERROR:"))
1928 .unwrap_or(false);
1929
1930 if !has_human_error {
1931 assert!(
1932 entry.is_balanced(),
1933 "Entry {:?} is not balanced",
1934 entry.header.document_id
1935 );
1936 balanced_count += 1;
1937 }
1938 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1939 }
1940
1941 assert!(
1943 balanced_count >= 80,
1944 "Expected at least 80 balanced entries, got {}",
1945 balanced_count
1946 );
1947 }
1948
1949 #[test]
1950 fn test_deterministic_generation() {
1951 let mut coa_gen =
1952 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1953 let coa = Arc::new(coa_gen.generate());
1954
1955 let mut gen1 = JournalEntryGenerator::new_with_params(
1956 TransactionConfig::default(),
1957 Arc::clone(&coa),
1958 vec!["1000".to_string()],
1959 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1960 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1961 42,
1962 );
1963
1964 let mut gen2 = JournalEntryGenerator::new_with_params(
1965 TransactionConfig::default(),
1966 coa,
1967 vec!["1000".to_string()],
1968 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1969 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1970 42,
1971 );
1972
1973 for _ in 0..50 {
1974 let e1 = gen1.generate();
1975 let e2 = gen2.generate();
1976 assert_eq!(e1.header.document_id, e2.header.document_id);
1977 assert_eq!(e1.total_debit(), e2.total_debit());
1978 }
1979 }
1980
1981 #[test]
1982 fn test_templates_generate_descriptions() {
1983 let mut coa_gen =
1984 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1985 let coa = Arc::new(coa_gen.generate());
1986
1987 let template_config = TemplateConfig {
1989 names: datasynth_config::schema::NameTemplateConfig {
1990 generate_realistic_names: true,
1991 email_domain: "test.com".to_string(),
1992 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1993 },
1994 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1995 generate_header_text: true,
1996 generate_line_text: true,
1997 },
1998 references: datasynth_config::schema::ReferenceTemplateConfig {
1999 generate_references: true,
2000 invoice_prefix: "TEST-INV".to_string(),
2001 po_prefix: "TEST-PO".to_string(),
2002 so_prefix: "TEST-SO".to_string(),
2003 },
2004 };
2005
2006 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2007 TransactionConfig::default(),
2008 coa,
2009 vec!["1000".to_string()],
2010 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2011 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2012 42,
2013 template_config,
2014 None,
2015 )
2016 .with_persona_errors(false); for _ in 0..10 {
2019 let entry = je_gen.generate();
2020
2021 assert!(
2023 entry.header.header_text.is_some(),
2024 "Header text should be populated"
2025 );
2026
2027 assert!(
2029 entry.header.reference.is_some(),
2030 "Reference should be populated"
2031 );
2032
2033 assert!(
2035 entry.header.business_process.is_some(),
2036 "Business process should be set"
2037 );
2038
2039 for line in &entry.lines {
2041 assert!(line.line_text.is_some(), "Line text should be populated");
2042 }
2043
2044 assert!(entry.is_balanced());
2046 }
2047 }
2048
2049 #[test]
2050 fn test_user_pool_integration() {
2051 let mut coa_gen =
2052 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2053 let coa = Arc::new(coa_gen.generate());
2054
2055 let companies = vec!["1000".to_string()];
2056
2057 let mut user_gen = crate::UserGenerator::new(42);
2059 let user_pool = user_gen.generate_standard(&companies);
2060
2061 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2062 TransactionConfig::default(),
2063 coa,
2064 companies,
2065 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2066 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2067 42,
2068 TemplateConfig::default(),
2069 Some(user_pool),
2070 );
2071
2072 for _ in 0..20 {
2074 let entry = je_gen.generate();
2075
2076 assert!(!entry.header.created_by.is_empty());
2079 }
2080 }
2081
2082 #[test]
2083 fn test_master_data_connection() {
2084 let mut coa_gen =
2085 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2086 let coa = Arc::new(coa_gen.generate());
2087
2088 let vendors = vec![
2090 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2091 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2092 ];
2093
2094 let customers = vec![
2096 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2097 Customer::new(
2098 "C-TEST-002",
2099 "Test Customer Two",
2100 CustomerType::SmallBusiness,
2101 ),
2102 ];
2103
2104 let materials = vec![Material::new(
2106 "MAT-TEST-001",
2107 "Test Material A",
2108 MaterialType::RawMaterial,
2109 )];
2110
2111 let generator = JournalEntryGenerator::new_with_params(
2113 TransactionConfig::default(),
2114 coa,
2115 vec!["1000".to_string()],
2116 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2117 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2118 42,
2119 );
2120
2121 assert!(!generator.is_using_real_master_data());
2123
2124 let generator_with_data = generator
2126 .with_vendors(&vendors)
2127 .with_customers(&customers)
2128 .with_materials(&materials);
2129
2130 assert!(generator_with_data.is_using_real_master_data());
2132 }
2133
2134 #[test]
2135 fn test_with_master_data_convenience_method() {
2136 let mut coa_gen =
2137 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2138 let coa = Arc::new(coa_gen.generate());
2139
2140 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2141 let customers = vec![Customer::new(
2142 "C-001",
2143 "Customer One",
2144 CustomerType::Corporate,
2145 )];
2146 let materials = vec![Material::new(
2147 "MAT-001",
2148 "Material One",
2149 MaterialType::RawMaterial,
2150 )];
2151
2152 let generator = JournalEntryGenerator::new_with_params(
2153 TransactionConfig::default(),
2154 coa,
2155 vec!["1000".to_string()],
2156 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2157 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2158 42,
2159 )
2160 .with_master_data(&vendors, &customers, &materials);
2161
2162 assert!(generator.is_using_real_master_data());
2163 }
2164
2165 #[test]
2166 fn test_stress_factors_increase_error_rate() {
2167 let mut coa_gen =
2168 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2169 let coa = Arc::new(coa_gen.generate());
2170
2171 let generator = JournalEntryGenerator::new_with_params(
2172 TransactionConfig::default(),
2173 coa,
2174 vec!["1000".to_string()],
2175 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2176 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2177 42,
2178 );
2179
2180 let base_rate = 0.1;
2181
2182 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2185 assert!(
2186 (regular_rate - base_rate).abs() < 0.01,
2187 "Regular day should have minimal stress factor adjustment"
2188 );
2189
2190 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2193 assert!(
2194 month_end_rate > regular_rate,
2195 "Month end should have higher error rate than regular day"
2196 );
2197
2198 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2201 assert!(
2202 year_end_rate > month_end_rate,
2203 "Year end should have highest error rate"
2204 );
2205
2206 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2209 assert!(
2210 friday_rate > regular_rate,
2211 "Friday should have higher error rate than mid-week"
2212 );
2213
2214 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2217 assert!(
2218 monday_rate > regular_rate,
2219 "Monday should have higher error rate than mid-week"
2220 );
2221 }
2222
2223 #[test]
2224 fn test_batching_produces_similar_entries() {
2225 let mut coa_gen =
2226 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2227 let coa = Arc::new(coa_gen.generate());
2228
2229 let mut je_gen = JournalEntryGenerator::new_with_params(
2231 TransactionConfig::default(),
2232 coa,
2233 vec!["1000".to_string()],
2234 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2235 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2236 123,
2237 )
2238 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2242
2243 for entry in &entries {
2245 assert!(
2246 entry.is_balanced(),
2247 "All entries including batched should be balanced"
2248 );
2249 }
2250
2251 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2253 std::collections::HashMap::new();
2254 for entry in &entries {
2255 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2256 }
2257
2258 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2260 assert!(
2261 dates_with_multiple > 0,
2262 "With batching, should see some dates with multiple entries"
2263 );
2264 }
2265
2266 #[test]
2267 fn test_temporal_patterns_business_days() {
2268 use datasynth_config::schema::{
2269 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2270 };
2271
2272 let mut coa_gen =
2273 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2274 let coa = Arc::new(coa_gen.generate());
2275
2276 let temporal_config = TemporalPatternsConfig {
2278 enabled: true,
2279 business_days: BusinessDaySchemaConfig {
2280 enabled: true,
2281 ..Default::default()
2282 },
2283 calendars: CalendarSchemaConfig {
2284 regions: vec!["US".to_string()],
2285 custom_holidays: vec![],
2286 },
2287 ..Default::default()
2288 };
2289
2290 let mut je_gen = JournalEntryGenerator::new_with_params(
2291 TransactionConfig::default(),
2292 coa,
2293 vec!["1000".to_string()],
2294 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2295 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2297 )
2298 .with_temporal_patterns(temporal_config, 42)
2299 .with_persona_errors(false);
2300
2301 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2303
2304 for entry in &entries {
2305 let weekday = entry.header.posting_date.weekday();
2306 assert!(
2307 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2308 "Posting date {:?} should not be a weekend",
2309 entry.header.posting_date
2310 );
2311 }
2312 }
2313}