1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 batch_state: Option<BatchState>,
67 drift_controller: Option<DriftController>,
69 business_day_calculator: Option<BusinessDayCalculator>,
71 processing_lag_calculator: Option<ProcessingLagCalculator>,
72 temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75#[derive(Clone)]
80struct BatchState {
81 base_account_number: String,
83 base_amount: rust_decimal::Decimal,
84 base_business_process: Option<BusinessProcess>,
85 base_posting_date: NaiveDate,
86 remaining: u8,
88}
89
90impl JournalEntryGenerator {
91 pub fn new_with_params(
93 config: TransactionConfig,
94 coa: Arc<ChartOfAccounts>,
95 companies: Vec<String>,
96 start_date: NaiveDate,
97 end_date: NaiveDate,
98 seed: u64,
99 ) -> Self {
100 Self::new_with_full_config(
101 config,
102 coa,
103 companies,
104 start_date,
105 end_date,
106 seed,
107 TemplateConfig::default(),
108 None,
109 )
110 }
111
112 #[allow(clippy::too_many_arguments)]
114 pub fn new_with_full_config(
115 config: TransactionConfig,
116 coa: Arc<ChartOfAccounts>,
117 companies: Vec<String>,
118 start_date: NaiveDate,
119 end_date: NaiveDate,
120 seed: u64,
121 template_config: TemplateConfig,
122 user_pool: Option<UserPool>,
123 ) -> Self {
124 let user_pool = user_pool.or_else(|| {
126 if template_config.names.generate_realistic_names {
127 let user_gen_config = UserGeneratorConfig {
128 culture_distribution: vec![
129 (
130 datasynth_core::templates::NameCulture::WesternUs,
131 template_config.names.culture_distribution.western_us,
132 ),
133 (
134 datasynth_core::templates::NameCulture::Hispanic,
135 template_config.names.culture_distribution.hispanic,
136 ),
137 (
138 datasynth_core::templates::NameCulture::German,
139 template_config.names.culture_distribution.german,
140 ),
141 (
142 datasynth_core::templates::NameCulture::French,
143 template_config.names.culture_distribution.french,
144 ),
145 (
146 datasynth_core::templates::NameCulture::Chinese,
147 template_config.names.culture_distribution.chinese,
148 ),
149 (
150 datasynth_core::templates::NameCulture::Japanese,
151 template_config.names.culture_distribution.japanese,
152 ),
153 (
154 datasynth_core::templates::NameCulture::Indian,
155 template_config.names.culture_distribution.indian,
156 ),
157 ],
158 email_domain: template_config.names.email_domain.clone(),
159 generate_realistic_names: true,
160 };
161 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162 Some(user_gen.generate_standard(&companies))
163 } else {
164 None
165 }
166 });
167
168 let mut ref_gen = ReferenceGenerator::new(
170 start_date.year(),
171 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172 );
173 ref_gen.set_prefix(
174 ReferenceType::Invoice,
175 &template_config.references.invoice_prefix,
176 );
177 ref_gen.set_prefix(
178 ReferenceType::PurchaseOrder,
179 &template_config.references.po_prefix,
180 );
181 ref_gen.set_prefix(
182 ReferenceType::SalesOrder,
183 &template_config.references.so_prefix,
184 );
185
186 let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189 Self {
190 rng: seeded_rng(seed, 0),
191 seed,
192 config: config.clone(),
193 coa,
194 companies,
195 company_selector,
196 line_sampler: LineItemSampler::with_config(
197 seed + 1,
198 config.line_item_distribution.clone(),
199 config.even_odd_distribution.clone(),
200 config.debit_credit_distribution.clone(),
201 ),
202 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203 temporal_sampler: TemporalSampler::with_config(
204 seed + 3,
205 config.seasonality.clone(),
206 WorkingHoursConfig::default(),
207 Vec::new(),
208 ),
209 start_date,
210 end_date,
211 count: 0,
212 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213 user_pool,
214 description_generator: DescriptionGenerator::new(),
215 reference_generator: ref_gen,
216 template_config,
217 vendor_pool: VendorPool::standard(),
218 customer_pool: CustomerPool::standard(),
219 material_pool: None,
220 using_real_master_data: false,
221 fraud_config: FraudConfig::default(),
222 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
226 drift_controller: None,
227 business_day_calculator: None,
228 processing_lag_calculator: None,
229 temporal_patterns_config: None,
230 }
231 }
232
233 pub fn from_generator_config(
238 full_config: &GeneratorConfig,
239 coa: Arc<ChartOfAccounts>,
240 start_date: NaiveDate,
241 end_date: NaiveDate,
242 seed: u64,
243 ) -> Self {
244 let companies: Vec<String> = full_config
245 .companies
246 .iter()
247 .map(|c| c.code.clone())
248 .collect();
249
250 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253 let mut generator = Self::new_with_full_config(
254 full_config.transactions.clone(),
255 coa,
256 companies,
257 start_date,
258 end_date,
259 seed,
260 full_config.templates.clone(),
261 None,
262 );
263
264 generator.company_selector = company_selector;
266
267 generator.fraud_config = full_config.fraud.clone();
269
270 let temporal_config = &full_config.temporal_patterns;
272 if temporal_config.enabled {
273 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274 }
275
276 generator
277 }
278
279 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286 if config.business_days.enabled {
288 let region = config
289 .calendars
290 .regions
291 .first()
292 .map(|r| Self::parse_region(r))
293 .unwrap_or(Region::US);
294
295 let calendar = HolidayCalendar::new(region, self.start_date.year());
296 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297 }
298
299 if config.processing_lags.enabled {
301 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302 self.processing_lag_calculator =
303 Some(ProcessingLagCalculator::with_config(seed, lag_config));
304 }
305
306 let model = config.period_end.model.as_deref().unwrap_or("flat");
308 if model != "flat"
309 || config
310 .period_end
311 .month_end
312 .as_ref()
313 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314 {
315 let dynamics = Self::convert_period_end_config(&config.period_end);
316 self.temporal_sampler.set_period_end_dynamics(dynamics);
317 }
318
319 self.temporal_patterns_config = Some(config);
320 self
321 }
322
323 pub fn with_country_pack_temporal(
331 mut self,
332 config: TemporalPatternsConfig,
333 seed: u64,
334 pack: &CountryPack,
335 ) -> Self {
336 if config.business_days.enabled {
338 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340 }
341
342 if config.processing_lags.enabled {
344 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345 self.processing_lag_calculator =
346 Some(ProcessingLagCalculator::with_config(seed, lag_config));
347 }
348
349 let model = config.period_end.model.as_deref().unwrap_or("flat");
351 if model != "flat"
352 || config
353 .period_end
354 .month_end
355 .as_ref()
356 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357 {
358 let dynamics = Self::convert_period_end_config(&config.period_end);
359 self.temporal_sampler.set_period_end_dynamics(dynamics);
360 }
361
362 self.temporal_patterns_config = Some(config);
363 self
364 }
365
366 fn convert_processing_lag_config(
368 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369 ) -> ProcessingLagConfig {
370 let mut config = ProcessingLagConfig {
371 enabled: schema.enabled,
372 ..Default::default()
373 };
374
375 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378 if let Some(min) = lag.min_hours {
379 dist.min_lag_hours = min;
380 }
381 if let Some(max) = lag.max_hours {
382 dist.max_lag_hours = max;
383 }
384 dist
385 };
386
387 if let Some(ref lag) = schema.sales_order_lag {
389 config
390 .event_lags
391 .insert(EventType::SalesOrder, convert_lag(lag));
392 }
393 if let Some(ref lag) = schema.purchase_order_lag {
394 config
395 .event_lags
396 .insert(EventType::PurchaseOrder, convert_lag(lag));
397 }
398 if let Some(ref lag) = schema.goods_receipt_lag {
399 config
400 .event_lags
401 .insert(EventType::GoodsReceipt, convert_lag(lag));
402 }
403 if let Some(ref lag) = schema.invoice_receipt_lag {
404 config
405 .event_lags
406 .insert(EventType::InvoiceReceipt, convert_lag(lag));
407 }
408 if let Some(ref lag) = schema.invoice_issue_lag {
409 config
410 .event_lags
411 .insert(EventType::InvoiceIssue, convert_lag(lag));
412 }
413 if let Some(ref lag) = schema.payment_lag {
414 config
415 .event_lags
416 .insert(EventType::Payment, convert_lag(lag));
417 }
418 if let Some(ref lag) = schema.journal_entry_lag {
419 config
420 .event_lags
421 .insert(EventType::JournalEntry, convert_lag(lag));
422 }
423
424 if let Some(ref cross_day) = schema.cross_day_posting {
426 config.cross_day = CrossDayConfig {
427 enabled: cross_day.enabled,
428 probability_by_hour: cross_day.probability_by_hour.clone(),
429 ..Default::default()
430 };
431 }
432
433 config
434 }
435
436 fn convert_period_end_config(
438 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439 ) -> PeriodEndDynamics {
440 let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442 let convert_period =
444 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445 default_peak: f64|
446 -> PeriodEndConfig {
447 if let Some(p) = period {
448 let model = match model_type {
449 "flat" => PeriodEndModel::FlatMultiplier {
450 multiplier: p.peak_multiplier.unwrap_or(default_peak),
451 },
452 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453 start_day: p.start_day.unwrap_or(-10),
454 sustained_high_days: p.sustained_high_days.unwrap_or(3),
455 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456 ramp_up_days: 3, },
458 _ => PeriodEndModel::ExponentialAcceleration {
459 start_day: p.start_day.unwrap_or(-10),
460 base_multiplier: p.base_multiplier.unwrap_or(1.0),
461 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462 decay_rate: p.decay_rate.unwrap_or(0.3),
463 },
464 };
465 PeriodEndConfig {
466 enabled: true,
467 model,
468 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469 }
470 } else {
471 PeriodEndConfig {
472 enabled: true,
473 model: PeriodEndModel::ExponentialAcceleration {
474 start_day: -10,
475 base_multiplier: 1.0,
476 peak_multiplier: default_peak,
477 decay_rate: 0.3,
478 },
479 additional_multiplier: 1.0,
480 }
481 }
482 };
483
484 PeriodEndDynamics::new(
485 convert_period(schema.month_end.as_ref(), 2.0),
486 convert_period(schema.quarter_end.as_ref(), 3.5),
487 convert_period(schema.year_end.as_ref(), 5.0),
488 )
489 }
490
491 fn parse_region(region_str: &str) -> Region {
493 match region_str.to_uppercase().as_str() {
494 "US" => Region::US,
495 "DE" => Region::DE,
496 "GB" => Region::GB,
497 "CN" => Region::CN,
498 "JP" => Region::JP,
499 "IN" => Region::IN,
500 "BR" => Region::BR,
501 "MX" => Region::MX,
502 "AU" => Region::AU,
503 "SG" => Region::SG,
504 "KR" => Region::KR,
505 _ => Region::US,
506 }
507 }
508
509 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
511 self.company_selector = selector;
512 }
513
514 pub fn company_selector(&self) -> &WeightedCompanySelector {
516 &self.company_selector
517 }
518
519 pub fn set_fraud_config(&mut self, config: FraudConfig) {
521 self.fraud_config = config;
522 }
523
524 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
529 if !vendors.is_empty() {
530 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
531 self.using_real_master_data = true;
532 }
533 self
534 }
535
536 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
541 if !customers.is_empty() {
542 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
543 self.using_real_master_data = true;
544 }
545 self
546 }
547
548 pub fn with_materials(mut self, materials: &[Material]) -> Self {
552 if !materials.is_empty() {
553 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
554 self.using_real_master_data = true;
555 }
556 self
557 }
558
559 pub fn with_master_data(
564 self,
565 vendors: &[Vendor],
566 customers: &[Customer],
567 materials: &[Material],
568 ) -> Self {
569 self.with_vendors(vendors)
570 .with_customers(customers)
571 .with_materials(materials)
572 }
573
574 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
581 let name_gen =
582 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
583 let config = UserGeneratorConfig {
584 culture_distribution: Vec::new(),
587 email_domain: name_gen.email_domain().to_string(),
588 generate_realistic_names: true,
589 };
590 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
591 self.user_pool = Some(user_gen.generate_standard(&self.companies));
592 self
593 }
594
595 pub fn is_using_real_master_data(&self) -> bool {
597 self.using_real_master_data
598 }
599
600 fn determine_fraud(&mut self) -> Option<FraudType> {
602 if !self.fraud_config.enabled {
603 return None;
604 }
605
606 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
608 return None;
609 }
610
611 Some(self.select_fraud_type())
613 }
614
615 fn select_fraud_type(&mut self) -> FraudType {
617 let dist = &self.fraud_config.fraud_type_distribution;
618 let roll: f64 = self.rng.random();
619
620 let mut cumulative = 0.0;
621
622 cumulative += dist.suspense_account_abuse;
623 if roll < cumulative {
624 return FraudType::SuspenseAccountAbuse;
625 }
626
627 cumulative += dist.fictitious_transaction;
628 if roll < cumulative {
629 return FraudType::FictitiousTransaction;
630 }
631
632 cumulative += dist.revenue_manipulation;
633 if roll < cumulative {
634 return FraudType::RevenueManipulation;
635 }
636
637 cumulative += dist.expense_capitalization;
638 if roll < cumulative {
639 return FraudType::ExpenseCapitalization;
640 }
641
642 cumulative += dist.split_transaction;
643 if roll < cumulative {
644 return FraudType::SplitTransaction;
645 }
646
647 cumulative += dist.timing_anomaly;
648 if roll < cumulative {
649 return FraudType::TimingAnomaly;
650 }
651
652 cumulative += dist.unauthorized_access;
653 if roll < cumulative {
654 return FraudType::UnauthorizedAccess;
655 }
656
657 FraudType::DuplicatePayment
659 }
660
661 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
663 match fraud_type {
664 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
665 FraudAmountPattern::ThresholdAdjacent
666 }
667 FraudType::FictitiousTransaction
668 | FraudType::FictitiousEntry
669 | FraudType::SuspenseAccountAbuse
670 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
671 FraudType::RevenueManipulation
672 | FraudType::ExpenseCapitalization
673 | FraudType::ImproperCapitalization
674 | FraudType::ReserveManipulation
675 | FraudType::UnauthorizedAccess
676 | FraudType::PrematureRevenue
677 | FraudType::UnderstatedLiabilities
678 | FraudType::OverstatedAssets
679 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
680 FraudType::DuplicatePayment
681 | FraudType::TimingAnomaly
682 | FraudType::SelfApproval
683 | FraudType::ExceededApprovalLimit
684 | FraudType::SegregationOfDutiesViolation
685 | FraudType::UnauthorizedApproval
686 | FraudType::CollusiveApproval
687 | FraudType::FictitiousVendor
688 | FraudType::ShellCompanyPayment
689 | FraudType::Kickback
690 | FraudType::KickbackScheme
691 | FraudType::InvoiceManipulation
692 | FraudType::AssetMisappropriation
693 | FraudType::InventoryTheft
694 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
695 FraudType::ImproperRevenueRecognition
697 | FraudType::ImproperPoAllocation
698 | FraudType::VariableConsiderationManipulation
699 | FraudType::ContractModificationMisstatement => {
700 FraudAmountPattern::StatisticallyImprobable
701 }
702 FraudType::LeaseClassificationManipulation
704 | FraudType::OffBalanceSheetLease
705 | FraudType::LeaseLiabilityUnderstatement
706 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
707 FraudType::FairValueHierarchyManipulation
709 | FraudType::Level3InputManipulation
710 | FraudType::ValuationTechniqueManipulation => {
711 FraudAmountPattern::StatisticallyImprobable
712 }
713 FraudType::DelayedImpairment
715 | FraudType::ImpairmentTestAvoidance
716 | FraudType::CashFlowProjectionManipulation
717 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
718 FraudType::BidRigging
720 | FraudType::PhantomVendorContract
721 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
722 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
723 FraudType::GhostEmployeePayroll
725 | FraudType::PayrollInflation
726 | FraudType::DuplicateExpenseReport
727 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
728 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
729 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
731 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
732 }
733 }
734
735 #[inline]
737 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
738 self.uuid_factory.next()
739 }
740
741 pub fn generate(&mut self) -> JournalEntry {
743 debug!(
744 count = self.count,
745 companies = self.companies.len(),
746 start_date = %self.start_date,
747 end_date = %self.end_date,
748 "Generating journal entry"
749 );
750
751 if let Some(ref state) = self.batch_state {
753 if state.remaining > 0 {
754 return self.generate_batched_entry();
755 }
756 }
757
758 self.count += 1;
759
760 let document_id = self.generate_deterministic_uuid();
762
763 let mut posting_date = self
765 .temporal_sampler
766 .sample_date(self.start_date, self.end_date);
767
768 if let Some(ref calc) = self.business_day_calculator {
770 if !calc.is_business_day(posting_date) {
771 posting_date = calc.next_business_day(posting_date, false);
773 if posting_date > self.end_date {
775 posting_date = calc.prev_business_day(self.end_date, true);
776 }
777 }
778 }
779
780 let company_code = self.company_selector.select(&mut self.rng).to_string();
782
783 let line_spec = self.line_sampler.sample();
785
786 let source = self.select_source();
788 let is_automated = matches!(
789 source,
790 TransactionSource::Automated | TransactionSource::Recurring
791 );
792
793 let business_process = self.select_business_process();
795
796 let fraud_type = self.determine_fraud();
798 let is_fraud = fraud_type.is_some();
799
800 let time = self.temporal_sampler.sample_time(!is_automated);
802 let created_at = posting_date.and_time(time).and_utc();
803
804 let (created_by, user_persona) = self.select_user(is_automated);
806
807 let mut header =
809 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
810 header.created_at = created_at;
811 header.source = source;
812 header.created_by = created_by;
813 header.user_persona = user_persona;
814 header.business_process = Some(business_process);
815 header.is_fraud = is_fraud;
816 header.fraud_type = fraud_type;
817
818 let mut context =
820 DescriptionContext::with_period(posting_date.month(), posting_date.year());
821
822 match business_process {
824 BusinessProcess::P2P => {
825 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
826 context.vendor_name = Some(vendor.name.clone());
827 }
828 }
829 BusinessProcess::O2C => {
830 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
831 context.customer_name = Some(customer.name.clone());
832 }
833 }
834 _ => {}
835 }
836
837 if self.template_config.descriptions.generate_header_text {
839 header.header_text = Some(self.description_generator.generate_header_text(
840 business_process,
841 &context,
842 &mut self.rng,
843 ));
844 }
845
846 if self.template_config.references.generate_references {
848 header.reference = Some(
849 self.reference_generator
850 .generate_for_process_year(business_process, posting_date.year()),
851 );
852 }
853
854 let mut entry = JournalEntry::new(header);
856
857 let base_amount = if let Some(ft) = fraud_type {
859 let pattern = self.fraud_type_to_amount_pattern(ft);
860 self.amount_sampler.sample_fraud(pattern)
861 } else {
862 self.amount_sampler.sample()
863 };
864
865 let drift_adjusted_amount = {
867 let drift = self.get_drift_adjustments(posting_date);
868 if drift.amount_mean_multiplier != 1.0 {
869 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
871 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
872 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
873 } else {
874 base_amount
875 }
876 };
877
878 let total_amount = if is_automated {
880 drift_adjusted_amount } else {
882 self.apply_human_variation(drift_adjusted_amount)
883 };
884
885 let debit_amounts = self
887 .amount_sampler
888 .sample_summing_to(line_spec.debit_count, total_amount);
889 for (i, amount) in debit_amounts.into_iter().enumerate() {
890 let account_number = self.select_debit_account().account_number.clone();
891 let mut line = JournalEntryLine::debit(
892 entry.header.document_id,
893 (i + 1) as u32,
894 account_number.clone(),
895 amount,
896 );
897
898 if self.template_config.descriptions.generate_line_text {
900 line.line_text = Some(self.description_generator.generate_line_text(
901 &account_number,
902 &context,
903 &mut self.rng,
904 ));
905 }
906
907 entry.add_line(line);
908 }
909
910 let credit_amounts = self
912 .amount_sampler
913 .sample_summing_to(line_spec.credit_count, total_amount);
914 for (i, amount) in credit_amounts.into_iter().enumerate() {
915 let account_number = self.select_credit_account().account_number.clone();
916 let mut line = JournalEntryLine::credit(
917 entry.header.document_id,
918 (line_spec.debit_count + i + 1) as u32,
919 account_number.clone(),
920 amount,
921 );
922
923 if self.template_config.descriptions.generate_line_text {
925 line.line_text = Some(self.description_generator.generate_line_text(
926 &account_number,
927 &context,
928 &mut self.rng,
929 ));
930 }
931
932 entry.add_line(line);
933 }
934
935 if self.persona_errors_enabled && !is_automated {
937 self.maybe_inject_persona_error(&mut entry);
938 }
939
940 if self.approval_enabled {
942 self.maybe_apply_approval_workflow(&mut entry, posting_date);
943 }
944
945 self.maybe_start_batch(&entry);
947
948 entry
949 }
950
951 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
956 self.persona_errors_enabled = enabled;
957 self
958 }
959
960 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
965 self.fraud_config = config;
966 self
967 }
968
969 pub fn persona_errors_enabled(&self) -> bool {
971 self.persona_errors_enabled
972 }
973
974 pub fn with_batching(mut self, enabled: bool) -> Self {
979 if !enabled {
980 self.batch_state = None;
981 }
982 self
983 }
984
985 pub fn batching_enabled(&self) -> bool {
987 true
989 }
990
991 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
996 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
998 return;
999 }
1000
1001 if self.rng.random::<f64>() > 0.15 {
1003 return;
1004 }
1005
1006 let base_account = entry
1008 .lines
1009 .first()
1010 .map(|l| l.gl_account.clone())
1011 .unwrap_or_default();
1012
1013 let base_amount = entry.total_debit();
1014
1015 self.batch_state = Some(BatchState {
1016 base_account_number: base_account,
1017 base_amount,
1018 base_business_process: entry.header.business_process,
1019 base_posting_date: entry.header.posting_date,
1020 remaining: self.rng.random_range(2..7), });
1022 }
1023
1024 fn generate_batched_entry(&mut self) -> JournalEntry {
1032 use rust_decimal::Decimal;
1033
1034 if let Some(ref mut state) = self.batch_state {
1036 state.remaining = state.remaining.saturating_sub(1);
1037 }
1038
1039 let Some(batch) = self.batch_state.clone() else {
1040 tracing::warn!(
1043 "generate_batched_entry called without batch_state; generating standard entry"
1044 );
1045 self.batch_state = None;
1046 return self.generate();
1047 };
1048
1049 let posting_date = batch.base_posting_date;
1051
1052 self.count += 1;
1053 let document_id = self.generate_deterministic_uuid();
1054
1055 let company_code = self.company_selector.select(&mut self.rng).to_string();
1057
1058 let _line_spec = LineItemSpec {
1060 total_count: 2,
1061 debit_count: 1,
1062 credit_count: 1,
1063 split_type: DebitCreditSplit::Equal,
1064 };
1065
1066 let source = TransactionSource::Manual;
1068
1069 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1071
1072 let time = self.temporal_sampler.sample_time(true);
1074 let created_at = posting_date.and_time(time).and_utc();
1075
1076 let (created_by, user_persona) = self.select_user(false);
1078
1079 let mut header =
1081 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1082 header.created_at = created_at;
1083 header.source = source;
1084 header.created_by = created_by;
1085 header.user_persona = user_persona;
1086 header.business_process = Some(business_process);
1087
1088 let variation = self.rng.random_range(-0.15..0.15);
1090 let varied_amount =
1091 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1092 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1093
1094 let mut entry = JournalEntry::new(header);
1096
1097 let debit_line = JournalEntryLine::debit(
1099 entry.header.document_id,
1100 1,
1101 batch.base_account_number.clone(),
1102 total_amount,
1103 );
1104 entry.add_line(debit_line);
1105
1106 let credit_account = self.select_credit_account().account_number.clone();
1108 let credit_line =
1109 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1110 entry.add_line(credit_line);
1111
1112 if self.persona_errors_enabled {
1114 self.maybe_inject_persona_error(&mut entry);
1115 }
1116
1117 if self.approval_enabled {
1119 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1120 }
1121
1122 if batch.remaining <= 1 {
1124 self.batch_state = None;
1125 }
1126
1127 entry
1128 }
1129
1130 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1132 let persona_str = &entry.header.user_persona;
1134 let persona = match persona_str.to_lowercase().as_str() {
1135 s if s.contains("junior") => UserPersona::JuniorAccountant,
1136 s if s.contains("senior") => UserPersona::SeniorAccountant,
1137 s if s.contains("controller") => UserPersona::Controller,
1138 s if s.contains("manager") => UserPersona::Manager,
1139 s if s.contains("executive") => UserPersona::Executive,
1140 _ => return, };
1142
1143 let base_error_rate = persona.error_rate();
1145
1146 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1148
1149 if self.rng.random::<f64>() >= adjusted_rate {
1151 return; }
1153
1154 self.inject_human_error(entry, persona);
1156 }
1157
1158 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1167 use chrono::Datelike;
1168
1169 let mut rate = base_rate;
1170 let day = posting_date.day();
1171 let month = posting_date.month();
1172
1173 if month == 12 && day >= 28 {
1175 rate *= 2.0;
1176 return rate.min(0.5); }
1178
1179 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1181 rate *= 1.75; return rate.min(0.4);
1183 }
1184
1185 if day >= 28 {
1187 rate *= 1.5; }
1189
1190 let weekday = posting_date.weekday();
1192 match weekday {
1193 chrono::Weekday::Mon => {
1194 rate *= 1.2;
1196 }
1197 chrono::Weekday::Fri => {
1198 rate *= 1.3;
1200 }
1201 _ => {}
1202 }
1203
1204 rate.min(0.4)
1206 }
1207
1208 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1217 use rust_decimal::Decimal;
1218
1219 if amount < Decimal::from(10) {
1221 return amount;
1222 }
1223
1224 if self.rng.random::<f64>() > 0.70 {
1226 return amount;
1227 }
1228
1229 let variation_type: u8 = self.rng.random_range(0..4);
1231
1232 match variation_type {
1233 0 => {
1234 let variation_pct = self.rng.random_range(-0.02..0.02);
1236 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1237 (amount + variation).round_dp(2)
1238 }
1239 1 => {
1240 let ten = Decimal::from(10);
1242 (amount / ten).round() * ten
1243 }
1244 2 => {
1245 if amount >= Decimal::from(500) {
1247 let hundred = Decimal::from(100);
1248 (amount / hundred).round() * hundred
1249 } else {
1250 amount
1251 }
1252 }
1253 3 => {
1254 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1256 (amount + cents).max(Decimal::ZERO).round_dp(2)
1257 }
1258 _ => amount,
1259 }
1260 }
1261
1262 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1268 let balancing_idx = entry.lines.iter().position(|l| {
1270 if modified_was_debit {
1271 l.credit_amount > Decimal::ZERO
1272 } else {
1273 l.debit_amount > Decimal::ZERO
1274 }
1275 });
1276
1277 if let Some(idx) = balancing_idx {
1278 if modified_was_debit {
1279 entry.lines[idx].credit_amount += impact;
1280 } else {
1281 entry.lines[idx].debit_amount += impact;
1282 }
1283 }
1284 }
1285
1286 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1291 use rust_decimal::Decimal;
1292
1293 let error_type: u8 = match persona {
1295 UserPersona::JuniorAccountant => {
1296 self.rng.random_range(0..5)
1298 }
1299 UserPersona::SeniorAccountant => {
1300 self.rng.random_range(0..3)
1302 }
1303 UserPersona::Controller | UserPersona::Manager => {
1304 self.rng.random_range(3..5)
1306 }
1307 _ => return,
1308 };
1309
1310 match error_type {
1311 0 => {
1312 if let Some(line) = entry.lines.get_mut(0) {
1314 let is_debit = line.debit_amount > Decimal::ZERO;
1315 let original_amount = if is_debit {
1316 line.debit_amount
1317 } else {
1318 line.credit_amount
1319 };
1320
1321 let s = original_amount.to_string();
1323 if s.len() >= 2 {
1324 let chars: Vec<char> = s.chars().collect();
1325 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1326 if chars[pos].is_ascii_digit()
1327 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1328 {
1329 let mut new_chars = chars;
1330 new_chars.swap(pos, pos + 1);
1331 if let Ok(new_amount) =
1332 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1333 {
1334 let impact = new_amount - original_amount;
1335
1336 if is_debit {
1338 entry.lines[0].debit_amount = new_amount;
1339 } else {
1340 entry.lines[0].credit_amount = new_amount;
1341 }
1342
1343 Self::rebalance_entry(entry, is_debit, impact);
1345
1346 entry.header.header_text = Some(
1347 entry.header.header_text.clone().unwrap_or_default()
1348 + " [HUMAN_ERROR:TRANSPOSITION]",
1349 );
1350 }
1351 }
1352 }
1353 }
1354 }
1355 1 => {
1356 if let Some(line) = entry.lines.get_mut(0) {
1358 let is_debit = line.debit_amount > Decimal::ZERO;
1359 let original_amount = if is_debit {
1360 line.debit_amount
1361 } else {
1362 line.credit_amount
1363 };
1364
1365 let new_amount = original_amount * Decimal::new(10, 0);
1366 let impact = new_amount - original_amount;
1367
1368 if is_debit {
1370 entry.lines[0].debit_amount = new_amount;
1371 } else {
1372 entry.lines[0].credit_amount = new_amount;
1373 }
1374
1375 Self::rebalance_entry(entry, is_debit, impact);
1377
1378 entry.header.header_text = Some(
1379 entry.header.header_text.clone().unwrap_or_default()
1380 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1381 );
1382 }
1383 }
1384 2 => {
1385 if let Some(ref mut text) = entry.header.header_text {
1387 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1388 let correct = ["the", "and", "with", "that", "receive"];
1389 let idx = self.rng.random_range(0..typos.len());
1390 if text.to_lowercase().contains(correct[idx]) {
1391 *text = text.replace(correct[idx], typos[idx]);
1392 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1393 }
1394 }
1395 }
1396 3 => {
1397 if let Some(line) = entry.lines.get_mut(0) {
1399 let is_debit = line.debit_amount > Decimal::ZERO;
1400 let original_amount = if is_debit {
1401 line.debit_amount
1402 } else {
1403 line.credit_amount
1404 };
1405
1406 let new_amount =
1407 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1408 let impact = new_amount - original_amount;
1409
1410 if is_debit {
1412 entry.lines[0].debit_amount = new_amount;
1413 } else {
1414 entry.lines[0].credit_amount = new_amount;
1415 }
1416
1417 Self::rebalance_entry(entry, is_debit, impact);
1419
1420 entry.header.header_text = Some(
1421 entry.header.header_text.clone().unwrap_or_default()
1422 + " [HUMAN_ERROR:ROUNDED]",
1423 );
1424 }
1425 }
1426 4 => {
1427 if entry.header.document_date == entry.header.posting_date {
1430 let days_late = self.rng.random_range(5..15);
1431 entry.header.document_date =
1432 entry.header.posting_date - chrono::Duration::days(days_late);
1433 entry.header.header_text = Some(
1434 entry.header.header_text.clone().unwrap_or_default()
1435 + " [HUMAN_ERROR:LATE_POSTING]",
1436 );
1437 }
1438 }
1439 _ => {}
1440 }
1441 }
1442
1443 fn maybe_apply_approval_workflow(
1448 &mut self,
1449 entry: &mut JournalEntry,
1450 _posting_date: NaiveDate,
1451 ) {
1452 use rust_decimal::Decimal;
1453
1454 let amount = entry.total_debit();
1455
1456 if amount <= self.approval_threshold {
1458 let workflow = ApprovalWorkflow::auto_approved(
1460 entry.header.created_by.clone(),
1461 entry.header.user_persona.clone(),
1462 amount,
1463 entry.header.created_at,
1464 );
1465 entry.header.approval_workflow = Some(workflow);
1466 return;
1467 }
1468
1469 entry.header.sox_relevant = true;
1471
1472 let required_levels = if amount > Decimal::new(100000, 0) {
1474 3 } else if amount > Decimal::new(50000, 0) {
1476 2 } else {
1478 1 };
1480
1481 let mut workflow = ApprovalWorkflow::new(
1483 entry.header.created_by.clone(),
1484 entry.header.user_persona.clone(),
1485 amount,
1486 );
1487 workflow.required_levels = required_levels;
1488
1489 let submit_time = entry.header.created_at;
1491 let submit_action = ApprovalAction::new(
1492 entry.header.created_by.clone(),
1493 entry.header.user_persona.clone(),
1494 self.parse_persona(&entry.header.user_persona),
1495 ApprovalActionType::Submit,
1496 0,
1497 )
1498 .with_timestamp(submit_time);
1499
1500 workflow.actions.push(submit_action);
1501 workflow.status = ApprovalStatus::Pending;
1502 workflow.submitted_at = Some(submit_time);
1503
1504 let mut current_time = submit_time;
1506 for level in 1..=required_levels {
1507 let delay_hours = self.rng.random_range(1..4);
1509 current_time += chrono::Duration::hours(delay_hours);
1510
1511 while current_time.weekday() == chrono::Weekday::Sat
1513 || current_time.weekday() == chrono::Weekday::Sun
1514 {
1515 current_time += chrono::Duration::days(1);
1516 }
1517
1518 let (approver_id, approver_role) = self.select_approver(level);
1520
1521 let approve_action = ApprovalAction::new(
1522 approver_id.clone(),
1523 format!("{:?}", approver_role),
1524 approver_role,
1525 ApprovalActionType::Approve,
1526 level,
1527 )
1528 .with_timestamp(current_time);
1529
1530 workflow.actions.push(approve_action);
1531 workflow.current_level = level;
1532 }
1533
1534 workflow.status = ApprovalStatus::Approved;
1536 workflow.approved_at = Some(current_time);
1537
1538 entry.header.approval_workflow = Some(workflow);
1539 }
1540
1541 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1543 let persona = match level {
1544 1 => UserPersona::Manager,
1545 2 => UserPersona::Controller,
1546 _ => UserPersona::Executive,
1547 };
1548
1549 if let Some(ref pool) = self.user_pool {
1551 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1552 return (user.user_id.clone(), persona);
1553 }
1554 }
1555
1556 let approver_id = match persona {
1558 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1559 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1560 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1561 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1562 };
1563
1564 (approver_id, persona)
1565 }
1566
1567 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1569 match persona_str.to_lowercase().as_str() {
1570 s if s.contains("junior") => UserPersona::JuniorAccountant,
1571 s if s.contains("senior") => UserPersona::SeniorAccountant,
1572 s if s.contains("controller") => UserPersona::Controller,
1573 s if s.contains("manager") => UserPersona::Manager,
1574 s if s.contains("executive") => UserPersona::Executive,
1575 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1576 _ => UserPersona::JuniorAccountant, }
1578 }
1579
1580 pub fn with_approval(mut self, enabled: bool) -> Self {
1582 self.approval_enabled = enabled;
1583 self
1584 }
1585
1586 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1588 self.approval_threshold = threshold;
1589 self
1590 }
1591
1592 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1598 self.drift_controller = Some(controller);
1599 self
1600 }
1601
1602 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1607 if config.enabled {
1608 let total_periods = self.calculate_total_periods();
1609 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1610 }
1611 self
1612 }
1613
1614 fn calculate_total_periods(&self) -> u32 {
1616 let start_year = self.start_date.year();
1617 let start_month = self.start_date.month();
1618 let end_year = self.end_date.year();
1619 let end_month = self.end_date.month();
1620
1621 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1622 }
1623
1624 fn date_to_period(&self, date: NaiveDate) -> u32 {
1626 let start_year = self.start_date.year();
1627 let start_month = self.start_date.month() as i32;
1628 let date_year = date.year();
1629 let date_month = date.month() as i32;
1630
1631 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1632 }
1633
1634 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1636 if let Some(ref controller) = self.drift_controller {
1637 let period = self.date_to_period(date);
1638 controller.compute_adjustments(period)
1639 } else {
1640 DriftAdjustments::none()
1641 }
1642 }
1643
1644 #[inline]
1646 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1647 if let Some(ref pool) = self.user_pool {
1648 let persona = if is_automated {
1649 UserPersona::AutomatedSystem
1650 } else {
1651 let roll: f64 = self.rng.random();
1653 if roll < 0.4 {
1654 UserPersona::JuniorAccountant
1655 } else if roll < 0.7 {
1656 UserPersona::SeniorAccountant
1657 } else if roll < 0.85 {
1658 UserPersona::Controller
1659 } else {
1660 UserPersona::Manager
1661 }
1662 };
1663
1664 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1665 return (
1666 user.user_id.clone(),
1667 format!("{:?}", user.persona).to_lowercase(),
1668 );
1669 }
1670 }
1671
1672 if is_automated {
1674 (
1675 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1676 "automated_system".to_string(),
1677 )
1678 } else {
1679 (
1680 format!("USER{:04}", self.rng.random_range(1..=40)),
1681 "senior_accountant".to_string(),
1682 )
1683 }
1684 }
1685
1686 #[inline]
1688 fn select_source(&mut self) -> TransactionSource {
1689 let roll: f64 = self.rng.random();
1690 let dist = &self.config.source_distribution;
1691
1692 if roll < dist.manual {
1693 TransactionSource::Manual
1694 } else if roll < dist.manual + dist.automated {
1695 TransactionSource::Automated
1696 } else if roll < dist.manual + dist.automated + dist.recurring {
1697 TransactionSource::Recurring
1698 } else {
1699 TransactionSource::Adjustment
1700 }
1701 }
1702
1703 #[inline]
1705 fn select_business_process(&mut self) -> BusinessProcess {
1706 let roll: f64 = self.rng.random();
1707
1708 if roll < 0.35 {
1710 BusinessProcess::O2C
1711 } else if roll < 0.65 {
1712 BusinessProcess::P2P
1713 } else if roll < 0.85 {
1714 BusinessProcess::R2R
1715 } else if roll < 0.95 {
1716 BusinessProcess::H2R
1717 } else {
1718 BusinessProcess::A2R
1719 }
1720 }
1721
1722 #[inline]
1723 fn select_debit_account(&mut self) -> &GLAccount {
1724 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1725 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1726
1727 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1729 accounts
1730 } else {
1731 expense_accounts
1732 };
1733
1734 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1735 tracing::warn!(
1736 "Account selection returned empty list, falling back to first COA account"
1737 );
1738 &self.coa.accounts[0]
1739 })
1740 }
1741
1742 #[inline]
1743 fn select_credit_account(&mut self) -> &GLAccount {
1744 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1745 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1746
1747 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1749 liability_accounts
1750 } else {
1751 revenue_accounts
1752 };
1753
1754 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1755 tracing::warn!(
1756 "Account selection returned empty list, falling back to first COA account"
1757 );
1758 &self.coa.accounts[0]
1759 })
1760 }
1761}
1762
1763impl Generator for JournalEntryGenerator {
1764 type Item = JournalEntry;
1765 type Config = (
1766 TransactionConfig,
1767 Arc<ChartOfAccounts>,
1768 Vec<String>,
1769 NaiveDate,
1770 NaiveDate,
1771 );
1772
1773 fn new(config: Self::Config, seed: u64) -> Self {
1774 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1775 }
1776
1777 fn generate_one(&mut self) -> Self::Item {
1778 self.generate()
1779 }
1780
1781 fn reset(&mut self) {
1782 self.rng = seeded_rng(self.seed, 0);
1783 self.line_sampler.reset(self.seed + 1);
1784 self.amount_sampler.reset(self.seed + 2);
1785 self.temporal_sampler.reset(self.seed + 3);
1786 self.count = 0;
1787 self.uuid_factory.reset();
1788
1789 let mut ref_gen = ReferenceGenerator::new(
1791 self.start_date.year(),
1792 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1793 );
1794 ref_gen.set_prefix(
1795 ReferenceType::Invoice,
1796 &self.template_config.references.invoice_prefix,
1797 );
1798 ref_gen.set_prefix(
1799 ReferenceType::PurchaseOrder,
1800 &self.template_config.references.po_prefix,
1801 );
1802 ref_gen.set_prefix(
1803 ReferenceType::SalesOrder,
1804 &self.template_config.references.so_prefix,
1805 );
1806 self.reference_generator = ref_gen;
1807 }
1808
1809 fn count(&self) -> u64 {
1810 self.count
1811 }
1812
1813 fn seed(&self) -> u64 {
1814 self.seed
1815 }
1816}
1817
1818use datasynth_core::traits::ParallelGenerator;
1819
1820impl ParallelGenerator for JournalEntryGenerator {
1821 fn split(self, parts: usize) -> Vec<Self> {
1827 let parts = parts.max(1);
1828 (0..parts)
1829 .map(|i| {
1830 let sub_seed = self
1832 .seed
1833 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1834
1835 let mut gen = JournalEntryGenerator::new_with_full_config(
1836 self.config.clone(),
1837 Arc::clone(&self.coa),
1838 self.companies.clone(),
1839 self.start_date,
1840 self.end_date,
1841 sub_seed,
1842 self.template_config.clone(),
1843 self.user_pool.clone(),
1844 );
1845
1846 gen.company_selector = self.company_selector.clone();
1848 gen.vendor_pool = self.vendor_pool.clone();
1849 gen.customer_pool = self.customer_pool.clone();
1850 gen.material_pool = self.material_pool.clone();
1851 gen.using_real_master_data = self.using_real_master_data;
1852 gen.fraud_config = self.fraud_config.clone();
1853 gen.persona_errors_enabled = self.persona_errors_enabled;
1854 gen.approval_enabled = self.approval_enabled;
1855 gen.approval_threshold = self.approval_threshold;
1856
1857 gen.uuid_factory = DeterministicUuidFactory::for_partition(
1859 sub_seed,
1860 GeneratorType::JournalEntry,
1861 i as u8,
1862 );
1863
1864 if let Some(ref config) = self.temporal_patterns_config {
1866 gen.temporal_patterns_config = Some(config.clone());
1867 if config.business_days.enabled {
1869 if let Some(ref bdc) = self.business_day_calculator {
1870 gen.business_day_calculator = Some(bdc.clone());
1871 }
1872 }
1873 if config.processing_lags.enabled {
1875 let lag_config =
1876 Self::convert_processing_lag_config(&config.processing_lags);
1877 gen.processing_lag_calculator =
1878 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
1879 }
1880 }
1881
1882 if let Some(ref dc) = self.drift_controller {
1884 gen.drift_controller = Some(dc.clone());
1885 }
1886
1887 gen
1888 })
1889 .collect()
1890 }
1891}
1892
1893#[cfg(test)]
1894#[allow(clippy::unwrap_used)]
1895mod tests {
1896 use super::*;
1897 use crate::ChartOfAccountsGenerator;
1898
1899 #[test]
1900 fn test_generate_balanced_entries() {
1901 let mut coa_gen =
1902 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1903 let coa = Arc::new(coa_gen.generate());
1904
1905 let mut je_gen = JournalEntryGenerator::new_with_params(
1906 TransactionConfig::default(),
1907 coa,
1908 vec!["1000".to_string()],
1909 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1910 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1911 42,
1912 );
1913
1914 let mut balanced_count = 0;
1915 for _ in 0..100 {
1916 let entry = je_gen.generate();
1917
1918 let has_human_error = entry
1920 .header
1921 .header_text
1922 .as_ref()
1923 .map(|t| t.contains("[HUMAN_ERROR:"))
1924 .unwrap_or(false);
1925
1926 if !has_human_error {
1927 assert!(
1928 entry.is_balanced(),
1929 "Entry {:?} is not balanced",
1930 entry.header.document_id
1931 );
1932 balanced_count += 1;
1933 }
1934 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1935 }
1936
1937 assert!(
1939 balanced_count >= 80,
1940 "Expected at least 80 balanced entries, got {}",
1941 balanced_count
1942 );
1943 }
1944
1945 #[test]
1946 fn test_deterministic_generation() {
1947 let mut coa_gen =
1948 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1949 let coa = Arc::new(coa_gen.generate());
1950
1951 let mut gen1 = JournalEntryGenerator::new_with_params(
1952 TransactionConfig::default(),
1953 Arc::clone(&coa),
1954 vec!["1000".to_string()],
1955 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1956 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1957 42,
1958 );
1959
1960 let mut gen2 = JournalEntryGenerator::new_with_params(
1961 TransactionConfig::default(),
1962 coa,
1963 vec!["1000".to_string()],
1964 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1965 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1966 42,
1967 );
1968
1969 for _ in 0..50 {
1970 let e1 = gen1.generate();
1971 let e2 = gen2.generate();
1972 assert_eq!(e1.header.document_id, e2.header.document_id);
1973 assert_eq!(e1.total_debit(), e2.total_debit());
1974 }
1975 }
1976
1977 #[test]
1978 fn test_templates_generate_descriptions() {
1979 let mut coa_gen =
1980 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1981 let coa = Arc::new(coa_gen.generate());
1982
1983 let template_config = TemplateConfig {
1985 names: datasynth_config::schema::NameTemplateConfig {
1986 generate_realistic_names: true,
1987 email_domain: "test.com".to_string(),
1988 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1989 },
1990 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1991 generate_header_text: true,
1992 generate_line_text: true,
1993 },
1994 references: datasynth_config::schema::ReferenceTemplateConfig {
1995 generate_references: true,
1996 invoice_prefix: "TEST-INV".to_string(),
1997 po_prefix: "TEST-PO".to_string(),
1998 so_prefix: "TEST-SO".to_string(),
1999 },
2000 };
2001
2002 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2003 TransactionConfig::default(),
2004 coa,
2005 vec!["1000".to_string()],
2006 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2007 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2008 42,
2009 template_config,
2010 None,
2011 )
2012 .with_persona_errors(false); for _ in 0..10 {
2015 let entry = je_gen.generate();
2016
2017 assert!(
2019 entry.header.header_text.is_some(),
2020 "Header text should be populated"
2021 );
2022
2023 assert!(
2025 entry.header.reference.is_some(),
2026 "Reference should be populated"
2027 );
2028
2029 assert!(
2031 entry.header.business_process.is_some(),
2032 "Business process should be set"
2033 );
2034
2035 for line in &entry.lines {
2037 assert!(line.line_text.is_some(), "Line text should be populated");
2038 }
2039
2040 assert!(entry.is_balanced());
2042 }
2043 }
2044
2045 #[test]
2046 fn test_user_pool_integration() {
2047 let mut coa_gen =
2048 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2049 let coa = Arc::new(coa_gen.generate());
2050
2051 let companies = vec!["1000".to_string()];
2052
2053 let mut user_gen = crate::UserGenerator::new(42);
2055 let user_pool = user_gen.generate_standard(&companies);
2056
2057 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2058 TransactionConfig::default(),
2059 coa,
2060 companies,
2061 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2062 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2063 42,
2064 TemplateConfig::default(),
2065 Some(user_pool),
2066 );
2067
2068 for _ in 0..20 {
2070 let entry = je_gen.generate();
2071
2072 assert!(!entry.header.created_by.is_empty());
2075 }
2076 }
2077
2078 #[test]
2079 fn test_master_data_connection() {
2080 let mut coa_gen =
2081 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2082 let coa = Arc::new(coa_gen.generate());
2083
2084 let vendors = vec![
2086 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2087 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2088 ];
2089
2090 let customers = vec![
2092 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2093 Customer::new(
2094 "C-TEST-002",
2095 "Test Customer Two",
2096 CustomerType::SmallBusiness,
2097 ),
2098 ];
2099
2100 let materials = vec![Material::new(
2102 "MAT-TEST-001",
2103 "Test Material A",
2104 MaterialType::RawMaterial,
2105 )];
2106
2107 let generator = JournalEntryGenerator::new_with_params(
2109 TransactionConfig::default(),
2110 coa,
2111 vec!["1000".to_string()],
2112 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2113 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2114 42,
2115 );
2116
2117 assert!(!generator.is_using_real_master_data());
2119
2120 let generator_with_data = generator
2122 .with_vendors(&vendors)
2123 .with_customers(&customers)
2124 .with_materials(&materials);
2125
2126 assert!(generator_with_data.is_using_real_master_data());
2128 }
2129
2130 #[test]
2131 fn test_with_master_data_convenience_method() {
2132 let mut coa_gen =
2133 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2134 let coa = Arc::new(coa_gen.generate());
2135
2136 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2137 let customers = vec![Customer::new(
2138 "C-001",
2139 "Customer One",
2140 CustomerType::Corporate,
2141 )];
2142 let materials = vec![Material::new(
2143 "MAT-001",
2144 "Material One",
2145 MaterialType::RawMaterial,
2146 )];
2147
2148 let generator = JournalEntryGenerator::new_with_params(
2149 TransactionConfig::default(),
2150 coa,
2151 vec!["1000".to_string()],
2152 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2153 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2154 42,
2155 )
2156 .with_master_data(&vendors, &customers, &materials);
2157
2158 assert!(generator.is_using_real_master_data());
2159 }
2160
2161 #[test]
2162 fn test_stress_factors_increase_error_rate() {
2163 let mut coa_gen =
2164 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2165 let coa = Arc::new(coa_gen.generate());
2166
2167 let generator = JournalEntryGenerator::new_with_params(
2168 TransactionConfig::default(),
2169 coa,
2170 vec!["1000".to_string()],
2171 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2172 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2173 42,
2174 );
2175
2176 let base_rate = 0.1;
2177
2178 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2181 assert!(
2182 (regular_rate - base_rate).abs() < 0.01,
2183 "Regular day should have minimal stress factor adjustment"
2184 );
2185
2186 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2189 assert!(
2190 month_end_rate > regular_rate,
2191 "Month end should have higher error rate than regular day"
2192 );
2193
2194 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2197 assert!(
2198 year_end_rate > month_end_rate,
2199 "Year end should have highest error rate"
2200 );
2201
2202 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2205 assert!(
2206 friday_rate > regular_rate,
2207 "Friday should have higher error rate than mid-week"
2208 );
2209
2210 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2213 assert!(
2214 monday_rate > regular_rate,
2215 "Monday should have higher error rate than mid-week"
2216 );
2217 }
2218
2219 #[test]
2220 fn test_batching_produces_similar_entries() {
2221 let mut coa_gen =
2222 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2223 let coa = Arc::new(coa_gen.generate());
2224
2225 let mut je_gen = JournalEntryGenerator::new_with_params(
2227 TransactionConfig::default(),
2228 coa,
2229 vec!["1000".to_string()],
2230 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2231 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2232 123,
2233 )
2234 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2238
2239 for entry in &entries {
2241 assert!(
2242 entry.is_balanced(),
2243 "All entries including batched should be balanced"
2244 );
2245 }
2246
2247 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2249 std::collections::HashMap::new();
2250 for entry in &entries {
2251 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2252 }
2253
2254 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2256 assert!(
2257 dates_with_multiple > 0,
2258 "With batching, should see some dates with multiple entries"
2259 );
2260 }
2261
2262 #[test]
2263 fn test_temporal_patterns_business_days() {
2264 use datasynth_config::schema::{
2265 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2266 };
2267
2268 let mut coa_gen =
2269 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2270 let coa = Arc::new(coa_gen.generate());
2271
2272 let temporal_config = TemporalPatternsConfig {
2274 enabled: true,
2275 business_days: BusinessDaySchemaConfig {
2276 enabled: true,
2277 ..Default::default()
2278 },
2279 calendars: CalendarSchemaConfig {
2280 regions: vec!["US".to_string()],
2281 custom_holidays: vec![],
2282 },
2283 ..Default::default()
2284 };
2285
2286 let mut je_gen = JournalEntryGenerator::new_with_params(
2287 TransactionConfig::default(),
2288 coa,
2289 vec!["1000".to_string()],
2290 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2291 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2293 )
2294 .with_temporal_patterns(temporal_config, 42)
2295 .with_persona_errors(false);
2296
2297 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2299
2300 for entry in &entries {
2301 let weekday = entry.header.posting_date.weekday();
2302 assert!(
2303 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2304 "Posting date {:?} should not be a weekend",
2305 entry.header.posting_date
2306 );
2307 }
2308 }
2309}