1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 batch_state: Option<BatchState>,
67 drift_controller: Option<DriftController>,
69 business_day_calculator: Option<BusinessDayCalculator>,
71 processing_lag_calculator: Option<ProcessingLagCalculator>,
72 temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75#[derive(Clone)]
80struct BatchState {
81 base_account_number: String,
83 base_amount: rust_decimal::Decimal,
84 base_business_process: Option<BusinessProcess>,
85 base_posting_date: NaiveDate,
86 remaining: u8,
88}
89
90impl JournalEntryGenerator {
91 pub fn new_with_params(
93 config: TransactionConfig,
94 coa: Arc<ChartOfAccounts>,
95 companies: Vec<String>,
96 start_date: NaiveDate,
97 end_date: NaiveDate,
98 seed: u64,
99 ) -> Self {
100 Self::new_with_full_config(
101 config,
102 coa,
103 companies,
104 start_date,
105 end_date,
106 seed,
107 TemplateConfig::default(),
108 None,
109 )
110 }
111
112 #[allow(clippy::too_many_arguments)]
114 pub fn new_with_full_config(
115 config: TransactionConfig,
116 coa: Arc<ChartOfAccounts>,
117 companies: Vec<String>,
118 start_date: NaiveDate,
119 end_date: NaiveDate,
120 seed: u64,
121 template_config: TemplateConfig,
122 user_pool: Option<UserPool>,
123 ) -> Self {
124 let user_pool = user_pool.or_else(|| {
126 if template_config.names.generate_realistic_names {
127 let user_gen_config = UserGeneratorConfig {
128 culture_distribution: vec![
129 (
130 datasynth_core::templates::NameCulture::WesternUs,
131 template_config.names.culture_distribution.western_us,
132 ),
133 (
134 datasynth_core::templates::NameCulture::Hispanic,
135 template_config.names.culture_distribution.hispanic,
136 ),
137 (
138 datasynth_core::templates::NameCulture::German,
139 template_config.names.culture_distribution.german,
140 ),
141 (
142 datasynth_core::templates::NameCulture::French,
143 template_config.names.culture_distribution.french,
144 ),
145 (
146 datasynth_core::templates::NameCulture::Chinese,
147 template_config.names.culture_distribution.chinese,
148 ),
149 (
150 datasynth_core::templates::NameCulture::Japanese,
151 template_config.names.culture_distribution.japanese,
152 ),
153 (
154 datasynth_core::templates::NameCulture::Indian,
155 template_config.names.culture_distribution.indian,
156 ),
157 ],
158 email_domain: template_config.names.email_domain.clone(),
159 generate_realistic_names: true,
160 };
161 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162 Some(user_gen.generate_standard(&companies))
163 } else {
164 None
165 }
166 });
167
168 let mut ref_gen = ReferenceGenerator::new(
170 start_date.year(),
171 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172 );
173 ref_gen.set_prefix(
174 ReferenceType::Invoice,
175 &template_config.references.invoice_prefix,
176 );
177 ref_gen.set_prefix(
178 ReferenceType::PurchaseOrder,
179 &template_config.references.po_prefix,
180 );
181 ref_gen.set_prefix(
182 ReferenceType::SalesOrder,
183 &template_config.references.so_prefix,
184 );
185
186 let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189 Self {
190 rng: seeded_rng(seed, 0),
191 seed,
192 config: config.clone(),
193 coa,
194 companies,
195 company_selector,
196 line_sampler: LineItemSampler::with_config(
197 seed + 1,
198 config.line_item_distribution.clone(),
199 config.even_odd_distribution.clone(),
200 config.debit_credit_distribution.clone(),
201 ),
202 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203 temporal_sampler: TemporalSampler::with_config(
204 seed + 3,
205 config.seasonality.clone(),
206 WorkingHoursConfig::default(),
207 Vec::new(),
208 ),
209 start_date,
210 end_date,
211 count: 0,
212 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213 user_pool,
214 description_generator: DescriptionGenerator::new(),
215 reference_generator: ref_gen,
216 template_config,
217 vendor_pool: VendorPool::standard(),
218 customer_pool: CustomerPool::standard(),
219 material_pool: None,
220 using_real_master_data: false,
221 fraud_config: FraudConfig::default(),
222 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
226 drift_controller: None,
227 business_day_calculator: None,
228 processing_lag_calculator: None,
229 temporal_patterns_config: None,
230 }
231 }
232
233 pub fn from_generator_config(
238 full_config: &GeneratorConfig,
239 coa: Arc<ChartOfAccounts>,
240 start_date: NaiveDate,
241 end_date: NaiveDate,
242 seed: u64,
243 ) -> Self {
244 let companies: Vec<String> = full_config
245 .companies
246 .iter()
247 .map(|c| c.code.clone())
248 .collect();
249
250 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
252
253 let mut generator = Self::new_with_full_config(
254 full_config.transactions.clone(),
255 coa,
256 companies,
257 start_date,
258 end_date,
259 seed,
260 full_config.templates.clone(),
261 None,
262 );
263
264 generator.company_selector = company_selector;
266
267 generator.fraud_config = full_config.fraud.clone();
269
270 let temporal_config = &full_config.temporal_patterns;
272 if temporal_config.enabled {
273 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
274 }
275
276 generator
277 }
278
279 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
286 if config.business_days.enabled {
288 let region = config
289 .calendars
290 .regions
291 .first()
292 .map(|r| Self::parse_region(r))
293 .unwrap_or(Region::US);
294
295 let calendar = HolidayCalendar::new(region, self.start_date.year());
296 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
297 }
298
299 if config.processing_lags.enabled {
301 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
302 self.processing_lag_calculator =
303 Some(ProcessingLagCalculator::with_config(seed, lag_config));
304 }
305
306 let model = config.period_end.model.as_deref().unwrap_or("flat");
308 if model != "flat"
309 || config
310 .period_end
311 .month_end
312 .as_ref()
313 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
314 {
315 let dynamics = Self::convert_period_end_config(&config.period_end);
316 self.temporal_sampler.set_period_end_dynamics(dynamics);
317 }
318
319 self.temporal_patterns_config = Some(config);
320 self
321 }
322
323 pub fn with_country_pack_temporal(
331 mut self,
332 config: TemporalPatternsConfig,
333 seed: u64,
334 pack: &CountryPack,
335 ) -> Self {
336 if config.business_days.enabled {
338 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
339 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
340 }
341
342 if config.processing_lags.enabled {
344 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
345 self.processing_lag_calculator =
346 Some(ProcessingLagCalculator::with_config(seed, lag_config));
347 }
348
349 let model = config.period_end.model.as_deref().unwrap_or("flat");
351 if model != "flat"
352 || config
353 .period_end
354 .month_end
355 .as_ref()
356 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
357 {
358 let dynamics = Self::convert_period_end_config(&config.period_end);
359 self.temporal_sampler.set_period_end_dynamics(dynamics);
360 }
361
362 self.temporal_patterns_config = Some(config);
363 self
364 }
365
366 fn convert_processing_lag_config(
368 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
369 ) -> ProcessingLagConfig {
370 let mut config = ProcessingLagConfig {
371 enabled: schema.enabled,
372 ..Default::default()
373 };
374
375 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
377 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
378 if let Some(min) = lag.min_hours {
379 dist.min_lag_hours = min;
380 }
381 if let Some(max) = lag.max_hours {
382 dist.max_lag_hours = max;
383 }
384 dist
385 };
386
387 if let Some(ref lag) = schema.sales_order_lag {
389 config
390 .event_lags
391 .insert(EventType::SalesOrder, convert_lag(lag));
392 }
393 if let Some(ref lag) = schema.purchase_order_lag {
394 config
395 .event_lags
396 .insert(EventType::PurchaseOrder, convert_lag(lag));
397 }
398 if let Some(ref lag) = schema.goods_receipt_lag {
399 config
400 .event_lags
401 .insert(EventType::GoodsReceipt, convert_lag(lag));
402 }
403 if let Some(ref lag) = schema.invoice_receipt_lag {
404 config
405 .event_lags
406 .insert(EventType::InvoiceReceipt, convert_lag(lag));
407 }
408 if let Some(ref lag) = schema.invoice_issue_lag {
409 config
410 .event_lags
411 .insert(EventType::InvoiceIssue, convert_lag(lag));
412 }
413 if let Some(ref lag) = schema.payment_lag {
414 config
415 .event_lags
416 .insert(EventType::Payment, convert_lag(lag));
417 }
418 if let Some(ref lag) = schema.journal_entry_lag {
419 config
420 .event_lags
421 .insert(EventType::JournalEntry, convert_lag(lag));
422 }
423
424 if let Some(ref cross_day) = schema.cross_day_posting {
426 config.cross_day = CrossDayConfig {
427 enabled: cross_day.enabled,
428 probability_by_hour: cross_day.probability_by_hour.clone(),
429 ..Default::default()
430 };
431 }
432
433 config
434 }
435
436 fn convert_period_end_config(
438 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
439 ) -> PeriodEndDynamics {
440 let model_type = schema.model.as_deref().unwrap_or("exponential");
441
442 let convert_period =
444 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
445 default_peak: f64|
446 -> PeriodEndConfig {
447 if let Some(p) = period {
448 let model = match model_type {
449 "flat" => PeriodEndModel::FlatMultiplier {
450 multiplier: p.peak_multiplier.unwrap_or(default_peak),
451 },
452 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
453 start_day: p.start_day.unwrap_or(-10),
454 sustained_high_days: p.sustained_high_days.unwrap_or(3),
455 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
456 ramp_up_days: 3, },
458 _ => PeriodEndModel::ExponentialAcceleration {
459 start_day: p.start_day.unwrap_or(-10),
460 base_multiplier: p.base_multiplier.unwrap_or(1.0),
461 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
462 decay_rate: p.decay_rate.unwrap_or(0.3),
463 },
464 };
465 PeriodEndConfig {
466 enabled: true,
467 model,
468 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
469 }
470 } else {
471 PeriodEndConfig {
472 enabled: true,
473 model: PeriodEndModel::ExponentialAcceleration {
474 start_day: -10,
475 base_multiplier: 1.0,
476 peak_multiplier: default_peak,
477 decay_rate: 0.3,
478 },
479 additional_multiplier: 1.0,
480 }
481 }
482 };
483
484 PeriodEndDynamics::new(
485 convert_period(schema.month_end.as_ref(), 2.0),
486 convert_period(schema.quarter_end.as_ref(), 3.5),
487 convert_period(schema.year_end.as_ref(), 5.0),
488 )
489 }
490
491 fn parse_region(region_str: &str) -> Region {
493 match region_str.to_uppercase().as_str() {
494 "US" => Region::US,
495 "DE" => Region::DE,
496 "GB" => Region::GB,
497 "CN" => Region::CN,
498 "JP" => Region::JP,
499 "IN" => Region::IN,
500 "BR" => Region::BR,
501 "MX" => Region::MX,
502 "AU" => Region::AU,
503 "SG" => Region::SG,
504 "KR" => Region::KR,
505 _ => Region::US,
506 }
507 }
508
509 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
511 self.company_selector = selector;
512 }
513
514 pub fn company_selector(&self) -> &WeightedCompanySelector {
516 &self.company_selector
517 }
518
519 pub fn set_fraud_config(&mut self, config: FraudConfig) {
521 self.fraud_config = config;
522 }
523
524 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
529 if !vendors.is_empty() {
530 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
531 self.using_real_master_data = true;
532 }
533 self
534 }
535
536 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
541 if !customers.is_empty() {
542 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
543 self.using_real_master_data = true;
544 }
545 self
546 }
547
548 pub fn with_materials(mut self, materials: &[Material]) -> Self {
552 if !materials.is_empty() {
553 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
554 self.using_real_master_data = true;
555 }
556 self
557 }
558
559 pub fn with_master_data(
564 self,
565 vendors: &[Vendor],
566 customers: &[Customer],
567 materials: &[Material],
568 ) -> Self {
569 self.with_vendors(vendors)
570 .with_customers(customers)
571 .with_materials(materials)
572 }
573
574 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
581 let name_gen =
582 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
583 let config = UserGeneratorConfig {
584 culture_distribution: Vec::new(),
587 email_domain: name_gen.email_domain().to_string(),
588 generate_realistic_names: true,
589 };
590 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
591 self.user_pool = Some(user_gen.generate_standard(&self.companies));
592 self
593 }
594
595 pub fn is_using_real_master_data(&self) -> bool {
597 self.using_real_master_data
598 }
599
600 fn determine_fraud(&mut self) -> Option<FraudType> {
602 if !self.fraud_config.enabled {
603 return None;
604 }
605
606 if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
608 return None;
609 }
610
611 Some(self.select_fraud_type())
613 }
614
615 fn select_fraud_type(&mut self) -> FraudType {
617 let dist = &self.fraud_config.fraud_type_distribution;
618 let roll: f64 = self.rng.gen();
619
620 let mut cumulative = 0.0;
621
622 cumulative += dist.suspense_account_abuse;
623 if roll < cumulative {
624 return FraudType::SuspenseAccountAbuse;
625 }
626
627 cumulative += dist.fictitious_transaction;
628 if roll < cumulative {
629 return FraudType::FictitiousTransaction;
630 }
631
632 cumulative += dist.revenue_manipulation;
633 if roll < cumulative {
634 return FraudType::RevenueManipulation;
635 }
636
637 cumulative += dist.expense_capitalization;
638 if roll < cumulative {
639 return FraudType::ExpenseCapitalization;
640 }
641
642 cumulative += dist.split_transaction;
643 if roll < cumulative {
644 return FraudType::SplitTransaction;
645 }
646
647 cumulative += dist.timing_anomaly;
648 if roll < cumulative {
649 return FraudType::TimingAnomaly;
650 }
651
652 cumulative += dist.unauthorized_access;
653 if roll < cumulative {
654 return FraudType::UnauthorizedAccess;
655 }
656
657 FraudType::DuplicatePayment
659 }
660
661 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
663 match fraud_type {
664 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
665 FraudAmountPattern::ThresholdAdjacent
666 }
667 FraudType::FictitiousTransaction
668 | FraudType::FictitiousEntry
669 | FraudType::SuspenseAccountAbuse
670 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
671 FraudType::RevenueManipulation
672 | FraudType::ExpenseCapitalization
673 | FraudType::ImproperCapitalization
674 | FraudType::ReserveManipulation
675 | FraudType::UnauthorizedAccess
676 | FraudType::PrematureRevenue
677 | FraudType::UnderstatedLiabilities
678 | FraudType::OverstatedAssets
679 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
680 FraudType::DuplicatePayment
681 | FraudType::TimingAnomaly
682 | FraudType::SelfApproval
683 | FraudType::ExceededApprovalLimit
684 | FraudType::SegregationOfDutiesViolation
685 | FraudType::UnauthorizedApproval
686 | FraudType::CollusiveApproval
687 | FraudType::FictitiousVendor
688 | FraudType::ShellCompanyPayment
689 | FraudType::Kickback
690 | FraudType::KickbackScheme
691 | FraudType::InvoiceManipulation
692 | FraudType::AssetMisappropriation
693 | FraudType::InventoryTheft
694 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
695 FraudType::ImproperRevenueRecognition
697 | FraudType::ImproperPoAllocation
698 | FraudType::VariableConsiderationManipulation
699 | FraudType::ContractModificationMisstatement => {
700 FraudAmountPattern::StatisticallyImprobable
701 }
702 FraudType::LeaseClassificationManipulation
704 | FraudType::OffBalanceSheetLease
705 | FraudType::LeaseLiabilityUnderstatement
706 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
707 FraudType::FairValueHierarchyManipulation
709 | FraudType::Level3InputManipulation
710 | FraudType::ValuationTechniqueManipulation => {
711 FraudAmountPattern::StatisticallyImprobable
712 }
713 FraudType::DelayedImpairment
715 | FraudType::ImpairmentTestAvoidance
716 | FraudType::CashFlowProjectionManipulation
717 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
718 FraudType::BidRigging
720 | FraudType::PhantomVendorContract
721 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
722 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
723 FraudType::GhostEmployeePayroll
725 | FraudType::PayrollInflation
726 | FraudType::DuplicateExpenseReport
727 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
728 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
729 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
731 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
732 }
733 }
734
735 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
737 self.uuid_factory.next()
738 }
739
740 pub fn generate(&mut self) -> JournalEntry {
742 debug!(
743 count = self.count,
744 companies = self.companies.len(),
745 start_date = %self.start_date,
746 end_date = %self.end_date,
747 "Generating journal entry"
748 );
749
750 if let Some(ref state) = self.batch_state {
752 if state.remaining > 0 {
753 return self.generate_batched_entry();
754 }
755 }
756
757 self.count += 1;
758
759 let document_id = self.generate_deterministic_uuid();
761
762 let mut posting_date = self
764 .temporal_sampler
765 .sample_date(self.start_date, self.end_date);
766
767 if let Some(ref calc) = self.business_day_calculator {
769 if !calc.is_business_day(posting_date) {
770 posting_date = calc.next_business_day(posting_date, false);
772 if posting_date > self.end_date {
774 posting_date = calc.prev_business_day(self.end_date, true);
775 }
776 }
777 }
778
779 let company_code = self.company_selector.select(&mut self.rng).to_string();
781
782 let line_spec = self.line_sampler.sample();
784
785 let source = self.select_source();
787 let is_automated = matches!(
788 source,
789 TransactionSource::Automated | TransactionSource::Recurring
790 );
791
792 let business_process = self.select_business_process();
794
795 let fraud_type = self.determine_fraud();
797 let is_fraud = fraud_type.is_some();
798
799 let time = self.temporal_sampler.sample_time(!is_automated);
801 let created_at = posting_date.and_time(time).and_utc();
802
803 let (created_by, user_persona) = self.select_user(is_automated);
805
806 let mut header =
808 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
809 header.created_at = created_at;
810 header.source = source;
811 header.created_by = created_by;
812 header.user_persona = user_persona;
813 header.business_process = Some(business_process);
814 header.is_fraud = is_fraud;
815 header.fraud_type = fraud_type;
816
817 let mut context =
819 DescriptionContext::with_period(posting_date.month(), posting_date.year());
820
821 match business_process {
823 BusinessProcess::P2P => {
824 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
825 context.vendor_name = Some(vendor.name.clone());
826 }
827 }
828 BusinessProcess::O2C => {
829 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
830 context.customer_name = Some(customer.name.clone());
831 }
832 }
833 _ => {}
834 }
835
836 if self.template_config.descriptions.generate_header_text {
838 header.header_text = Some(self.description_generator.generate_header_text(
839 business_process,
840 &context,
841 &mut self.rng,
842 ));
843 }
844
845 if self.template_config.references.generate_references {
847 header.reference = Some(
848 self.reference_generator
849 .generate_for_process_year(business_process, posting_date.year()),
850 );
851 }
852
853 let mut entry = JournalEntry::new(header);
855
856 let base_amount = if let Some(ft) = fraud_type {
858 let pattern = self.fraud_type_to_amount_pattern(ft);
859 self.amount_sampler.sample_fraud(pattern)
860 } else {
861 self.amount_sampler.sample()
862 };
863
864 let drift_adjusted_amount = {
866 let drift = self.get_drift_adjustments(posting_date);
867 if drift.amount_mean_multiplier != 1.0 {
868 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
870 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
871 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
872 } else {
873 base_amount
874 }
875 };
876
877 let total_amount = if is_automated {
879 drift_adjusted_amount } else {
881 self.apply_human_variation(drift_adjusted_amount)
882 };
883
884 let debit_amounts = self
886 .amount_sampler
887 .sample_summing_to(line_spec.debit_count, total_amount);
888 for (i, amount) in debit_amounts.into_iter().enumerate() {
889 let account_number = self.select_debit_account().account_number.clone();
890 let mut line = JournalEntryLine::debit(
891 entry.header.document_id,
892 (i + 1) as u32,
893 account_number.clone(),
894 amount,
895 );
896
897 if self.template_config.descriptions.generate_line_text {
899 line.line_text = Some(self.description_generator.generate_line_text(
900 &account_number,
901 &context,
902 &mut self.rng,
903 ));
904 }
905
906 entry.add_line(line);
907 }
908
909 let credit_amounts = self
911 .amount_sampler
912 .sample_summing_to(line_spec.credit_count, total_amount);
913 for (i, amount) in credit_amounts.into_iter().enumerate() {
914 let account_number = self.select_credit_account().account_number.clone();
915 let mut line = JournalEntryLine::credit(
916 entry.header.document_id,
917 (line_spec.debit_count + i + 1) as u32,
918 account_number.clone(),
919 amount,
920 );
921
922 if self.template_config.descriptions.generate_line_text {
924 line.line_text = Some(self.description_generator.generate_line_text(
925 &account_number,
926 &context,
927 &mut self.rng,
928 ));
929 }
930
931 entry.add_line(line);
932 }
933
934 if self.persona_errors_enabled && !is_automated {
936 self.maybe_inject_persona_error(&mut entry);
937 }
938
939 if self.approval_enabled {
941 self.maybe_apply_approval_workflow(&mut entry, posting_date);
942 }
943
944 self.maybe_start_batch(&entry);
946
947 entry
948 }
949
950 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
955 self.persona_errors_enabled = enabled;
956 self
957 }
958
959 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
964 self.fraud_config = config;
965 self
966 }
967
968 pub fn persona_errors_enabled(&self) -> bool {
970 self.persona_errors_enabled
971 }
972
973 pub fn with_batching(mut self, enabled: bool) -> Self {
978 if !enabled {
979 self.batch_state = None;
980 }
981 self
982 }
983
984 pub fn batching_enabled(&self) -> bool {
986 true
988 }
989
990 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
995 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
997 return;
998 }
999
1000 if self.rng.gen::<f64>() > 0.15 {
1002 return;
1003 }
1004
1005 let base_account = entry
1007 .lines
1008 .first()
1009 .map(|l| l.gl_account.clone())
1010 .unwrap_or_default();
1011
1012 let base_amount = entry.total_debit();
1013
1014 self.batch_state = Some(BatchState {
1015 base_account_number: base_account,
1016 base_amount,
1017 base_business_process: entry.header.business_process,
1018 base_posting_date: entry.header.posting_date,
1019 remaining: self.rng.gen_range(2..7), });
1021 }
1022
1023 fn generate_batched_entry(&mut self) -> JournalEntry {
1031 use rust_decimal::Decimal;
1032
1033 if let Some(ref mut state) = self.batch_state {
1035 state.remaining = state.remaining.saturating_sub(1);
1036 }
1037
1038 let batch = self
1039 .batch_state
1040 .clone()
1041 .expect("batch_state set before calling generate_batched_entry");
1042
1043 let posting_date = batch.base_posting_date;
1045
1046 self.count += 1;
1047 let document_id = self.generate_deterministic_uuid();
1048
1049 let company_code = self.company_selector.select(&mut self.rng).to_string();
1051
1052 let _line_spec = LineItemSpec {
1054 total_count: 2,
1055 debit_count: 1,
1056 credit_count: 1,
1057 split_type: DebitCreditSplit::Equal,
1058 };
1059
1060 let source = TransactionSource::Manual;
1062
1063 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1065
1066 let time = self.temporal_sampler.sample_time(true);
1068 let created_at = posting_date.and_time(time).and_utc();
1069
1070 let (created_by, user_persona) = self.select_user(false);
1072
1073 let mut header =
1075 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1076 header.created_at = created_at;
1077 header.source = source;
1078 header.created_by = created_by;
1079 header.user_persona = user_persona;
1080 header.business_process = Some(business_process);
1081
1082 let variation = self.rng.gen_range(-0.15..0.15);
1084 let varied_amount =
1085 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1086 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1087
1088 let mut entry = JournalEntry::new(header);
1090
1091 let debit_line = JournalEntryLine::debit(
1093 entry.header.document_id,
1094 1,
1095 batch.base_account_number.clone(),
1096 total_amount,
1097 );
1098 entry.add_line(debit_line);
1099
1100 let credit_account = self.select_credit_account().account_number.clone();
1102 let credit_line =
1103 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1104 entry.add_line(credit_line);
1105
1106 if self.persona_errors_enabled {
1108 self.maybe_inject_persona_error(&mut entry);
1109 }
1110
1111 if self.approval_enabled {
1113 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1114 }
1115
1116 if batch.remaining <= 1 {
1118 self.batch_state = None;
1119 }
1120
1121 entry
1122 }
1123
1124 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1126 let persona_str = &entry.header.user_persona;
1128 let persona = match persona_str.to_lowercase().as_str() {
1129 s if s.contains("junior") => UserPersona::JuniorAccountant,
1130 s if s.contains("senior") => UserPersona::SeniorAccountant,
1131 s if s.contains("controller") => UserPersona::Controller,
1132 s if s.contains("manager") => UserPersona::Manager,
1133 s if s.contains("executive") => UserPersona::Executive,
1134 _ => return, };
1136
1137 let base_error_rate = persona.error_rate();
1139
1140 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1142
1143 if self.rng.gen::<f64>() >= adjusted_rate {
1145 return; }
1147
1148 self.inject_human_error(entry, persona);
1150 }
1151
1152 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1161 use chrono::Datelike;
1162
1163 let mut rate = base_rate;
1164 let day = posting_date.day();
1165 let month = posting_date.month();
1166
1167 if month == 12 && day >= 28 {
1169 rate *= 2.0;
1170 return rate.min(0.5); }
1172
1173 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1175 rate *= 1.75; return rate.min(0.4);
1177 }
1178
1179 if day >= 28 {
1181 rate *= 1.5; }
1183
1184 let weekday = posting_date.weekday();
1186 match weekday {
1187 chrono::Weekday::Mon => {
1188 rate *= 1.2;
1190 }
1191 chrono::Weekday::Fri => {
1192 rate *= 1.3;
1194 }
1195 _ => {}
1196 }
1197
1198 rate.min(0.4)
1200 }
1201
1202 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1211 use rust_decimal::Decimal;
1212
1213 if amount < Decimal::from(10) {
1215 return amount;
1216 }
1217
1218 if self.rng.gen::<f64>() > 0.70 {
1220 return amount;
1221 }
1222
1223 let variation_type: u8 = self.rng.gen_range(0..4);
1225
1226 match variation_type {
1227 0 => {
1228 let variation_pct = self.rng.gen_range(-0.02..0.02);
1230 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1231 (amount + variation).round_dp(2)
1232 }
1233 1 => {
1234 let ten = Decimal::from(10);
1236 (amount / ten).round() * ten
1237 }
1238 2 => {
1239 if amount >= Decimal::from(500) {
1241 let hundred = Decimal::from(100);
1242 (amount / hundred).round() * hundred
1243 } else {
1244 amount
1245 }
1246 }
1247 3 => {
1248 let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1250 (amount + cents).max(Decimal::ZERO).round_dp(2)
1251 }
1252 _ => amount,
1253 }
1254 }
1255
1256 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1262 let balancing_idx = entry.lines.iter().position(|l| {
1264 if modified_was_debit {
1265 l.credit_amount > Decimal::ZERO
1266 } else {
1267 l.debit_amount > Decimal::ZERO
1268 }
1269 });
1270
1271 if let Some(idx) = balancing_idx {
1272 if modified_was_debit {
1273 entry.lines[idx].credit_amount += impact;
1274 } else {
1275 entry.lines[idx].debit_amount += impact;
1276 }
1277 }
1278 }
1279
1280 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1285 use rust_decimal::Decimal;
1286
1287 let error_type: u8 = match persona {
1289 UserPersona::JuniorAccountant => {
1290 self.rng.gen_range(0..5)
1292 }
1293 UserPersona::SeniorAccountant => {
1294 self.rng.gen_range(0..3)
1296 }
1297 UserPersona::Controller | UserPersona::Manager => {
1298 self.rng.gen_range(3..5)
1300 }
1301 _ => return,
1302 };
1303
1304 match error_type {
1305 0 => {
1306 if let Some(line) = entry.lines.get_mut(0) {
1308 let is_debit = line.debit_amount > Decimal::ZERO;
1309 let original_amount = if is_debit {
1310 line.debit_amount
1311 } else {
1312 line.credit_amount
1313 };
1314
1315 let s = original_amount.to_string();
1317 if s.len() >= 2 {
1318 let chars: Vec<char> = s.chars().collect();
1319 let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1320 if chars[pos].is_ascii_digit()
1321 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1322 {
1323 let mut new_chars = chars;
1324 new_chars.swap(pos, pos + 1);
1325 if let Ok(new_amount) =
1326 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1327 {
1328 let impact = new_amount - original_amount;
1329
1330 if is_debit {
1332 entry.lines[0].debit_amount = new_amount;
1333 } else {
1334 entry.lines[0].credit_amount = new_amount;
1335 }
1336
1337 Self::rebalance_entry(entry, is_debit, impact);
1339
1340 entry.header.header_text = Some(
1341 entry.header.header_text.clone().unwrap_or_default()
1342 + " [HUMAN_ERROR:TRANSPOSITION]",
1343 );
1344 }
1345 }
1346 }
1347 }
1348 }
1349 1 => {
1350 if let Some(line) = entry.lines.get_mut(0) {
1352 let is_debit = line.debit_amount > Decimal::ZERO;
1353 let original_amount = if is_debit {
1354 line.debit_amount
1355 } else {
1356 line.credit_amount
1357 };
1358
1359 let new_amount = original_amount * Decimal::new(10, 0);
1360 let impact = new_amount - original_amount;
1361
1362 if is_debit {
1364 entry.lines[0].debit_amount = new_amount;
1365 } else {
1366 entry.lines[0].credit_amount = new_amount;
1367 }
1368
1369 Self::rebalance_entry(entry, is_debit, impact);
1371
1372 entry.header.header_text = Some(
1373 entry.header.header_text.clone().unwrap_or_default()
1374 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1375 );
1376 }
1377 }
1378 2 => {
1379 if let Some(ref mut text) = entry.header.header_text {
1381 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1382 let correct = ["the", "and", "with", "that", "receive"];
1383 let idx = self.rng.gen_range(0..typos.len());
1384 if text.to_lowercase().contains(correct[idx]) {
1385 *text = text.replace(correct[idx], typos[idx]);
1386 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1387 }
1388 }
1389 }
1390 3 => {
1391 if let Some(line) = entry.lines.get_mut(0) {
1393 let is_debit = line.debit_amount > Decimal::ZERO;
1394 let original_amount = if is_debit {
1395 line.debit_amount
1396 } else {
1397 line.credit_amount
1398 };
1399
1400 let new_amount =
1401 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1402 let impact = new_amount - original_amount;
1403
1404 if is_debit {
1406 entry.lines[0].debit_amount = new_amount;
1407 } else {
1408 entry.lines[0].credit_amount = new_amount;
1409 }
1410
1411 Self::rebalance_entry(entry, is_debit, impact);
1413
1414 entry.header.header_text = Some(
1415 entry.header.header_text.clone().unwrap_or_default()
1416 + " [HUMAN_ERROR:ROUNDED]",
1417 );
1418 }
1419 }
1420 4 => {
1421 if entry.header.document_date == entry.header.posting_date {
1424 let days_late = self.rng.gen_range(5..15);
1425 entry.header.document_date =
1426 entry.header.posting_date - chrono::Duration::days(days_late);
1427 entry.header.header_text = Some(
1428 entry.header.header_text.clone().unwrap_or_default()
1429 + " [HUMAN_ERROR:LATE_POSTING]",
1430 );
1431 }
1432 }
1433 _ => {}
1434 }
1435 }
1436
1437 fn maybe_apply_approval_workflow(
1442 &mut self,
1443 entry: &mut JournalEntry,
1444 _posting_date: NaiveDate,
1445 ) {
1446 use rust_decimal::Decimal;
1447
1448 let amount = entry.total_debit();
1449
1450 if amount <= self.approval_threshold {
1452 let workflow = ApprovalWorkflow::auto_approved(
1454 entry.header.created_by.clone(),
1455 entry.header.user_persona.clone(),
1456 amount,
1457 entry.header.created_at,
1458 );
1459 entry.header.approval_workflow = Some(workflow);
1460 return;
1461 }
1462
1463 entry.header.sox_relevant = true;
1465
1466 let required_levels = if amount > Decimal::new(100000, 0) {
1468 3 } else if amount > Decimal::new(50000, 0) {
1470 2 } else {
1472 1 };
1474
1475 let mut workflow = ApprovalWorkflow::new(
1477 entry.header.created_by.clone(),
1478 entry.header.user_persona.clone(),
1479 amount,
1480 );
1481 workflow.required_levels = required_levels;
1482
1483 let submit_time = entry.header.created_at;
1485 let submit_action = ApprovalAction::new(
1486 entry.header.created_by.clone(),
1487 entry.header.user_persona.clone(),
1488 self.parse_persona(&entry.header.user_persona),
1489 ApprovalActionType::Submit,
1490 0,
1491 )
1492 .with_timestamp(submit_time);
1493
1494 workflow.actions.push(submit_action);
1495 workflow.status = ApprovalStatus::Pending;
1496 workflow.submitted_at = Some(submit_time);
1497
1498 let mut current_time = submit_time;
1500 for level in 1..=required_levels {
1501 let delay_hours = self.rng.gen_range(1..4);
1503 current_time += chrono::Duration::hours(delay_hours);
1504
1505 while current_time.weekday() == chrono::Weekday::Sat
1507 || current_time.weekday() == chrono::Weekday::Sun
1508 {
1509 current_time += chrono::Duration::days(1);
1510 }
1511
1512 let (approver_id, approver_role) = self.select_approver(level);
1514
1515 let approve_action = ApprovalAction::new(
1516 approver_id.clone(),
1517 format!("{:?}", approver_role),
1518 approver_role,
1519 ApprovalActionType::Approve,
1520 level,
1521 )
1522 .with_timestamp(current_time);
1523
1524 workflow.actions.push(approve_action);
1525 workflow.current_level = level;
1526 }
1527
1528 workflow.status = ApprovalStatus::Approved;
1530 workflow.approved_at = Some(current_time);
1531
1532 entry.header.approval_workflow = Some(workflow);
1533 }
1534
1535 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1537 let persona = match level {
1538 1 => UserPersona::Manager,
1539 2 => UserPersona::Controller,
1540 _ => UserPersona::Executive,
1541 };
1542
1543 if let Some(ref pool) = self.user_pool {
1545 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1546 return (user.user_id.clone(), persona);
1547 }
1548 }
1549
1550 let approver_id = match persona {
1552 UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1553 UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1554 UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1555 _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1556 };
1557
1558 (approver_id, persona)
1559 }
1560
1561 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1563 match persona_str.to_lowercase().as_str() {
1564 s if s.contains("junior") => UserPersona::JuniorAccountant,
1565 s if s.contains("senior") => UserPersona::SeniorAccountant,
1566 s if s.contains("controller") => UserPersona::Controller,
1567 s if s.contains("manager") => UserPersona::Manager,
1568 s if s.contains("executive") => UserPersona::Executive,
1569 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1570 _ => UserPersona::JuniorAccountant, }
1572 }
1573
1574 pub fn with_approval(mut self, enabled: bool) -> Self {
1576 self.approval_enabled = enabled;
1577 self
1578 }
1579
1580 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1582 self.approval_threshold = threshold;
1583 self
1584 }
1585
1586 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1592 self.drift_controller = Some(controller);
1593 self
1594 }
1595
1596 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1601 if config.enabled {
1602 let total_periods = self.calculate_total_periods();
1603 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1604 }
1605 self
1606 }
1607
1608 fn calculate_total_periods(&self) -> u32 {
1610 let start_year = self.start_date.year();
1611 let start_month = self.start_date.month();
1612 let end_year = self.end_date.year();
1613 let end_month = self.end_date.month();
1614
1615 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1616 }
1617
1618 fn date_to_period(&self, date: NaiveDate) -> u32 {
1620 let start_year = self.start_date.year();
1621 let start_month = self.start_date.month() as i32;
1622 let date_year = date.year();
1623 let date_month = date.month() as i32;
1624
1625 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1626 }
1627
1628 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1630 if let Some(ref controller) = self.drift_controller {
1631 let period = self.date_to_period(date);
1632 controller.compute_adjustments(period)
1633 } else {
1634 DriftAdjustments::none()
1635 }
1636 }
1637
1638 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1640 if let Some(ref pool) = self.user_pool {
1641 let persona = if is_automated {
1642 UserPersona::AutomatedSystem
1643 } else {
1644 let roll: f64 = self.rng.gen();
1646 if roll < 0.4 {
1647 UserPersona::JuniorAccountant
1648 } else if roll < 0.7 {
1649 UserPersona::SeniorAccountant
1650 } else if roll < 0.85 {
1651 UserPersona::Controller
1652 } else {
1653 UserPersona::Manager
1654 }
1655 };
1656
1657 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1658 return (
1659 user.user_id.clone(),
1660 format!("{:?}", user.persona).to_lowercase(),
1661 );
1662 }
1663 }
1664
1665 if is_automated {
1667 (
1668 format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1669 "automated_system".to_string(),
1670 )
1671 } else {
1672 (
1673 format!("USER{:04}", self.rng.gen_range(1..=40)),
1674 "senior_accountant".to_string(),
1675 )
1676 }
1677 }
1678
1679 fn select_source(&mut self) -> TransactionSource {
1681 let roll: f64 = self.rng.gen();
1682 let dist = &self.config.source_distribution;
1683
1684 if roll < dist.manual {
1685 TransactionSource::Manual
1686 } else if roll < dist.manual + dist.automated {
1687 TransactionSource::Automated
1688 } else if roll < dist.manual + dist.automated + dist.recurring {
1689 TransactionSource::Recurring
1690 } else {
1691 TransactionSource::Adjustment
1692 }
1693 }
1694
1695 fn select_business_process(&mut self) -> BusinessProcess {
1697 let roll: f64 = self.rng.gen();
1698
1699 if roll < 0.35 {
1701 BusinessProcess::O2C
1702 } else if roll < 0.65 {
1703 BusinessProcess::P2P
1704 } else if roll < 0.85 {
1705 BusinessProcess::R2R
1706 } else if roll < 0.95 {
1707 BusinessProcess::H2R
1708 } else {
1709 BusinessProcess::A2R
1710 }
1711 }
1712
1713 fn select_debit_account(&mut self) -> &GLAccount {
1714 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1715 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1716
1717 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1719 accounts
1720 } else {
1721 expense_accounts
1722 };
1723
1724 all.choose(&mut self.rng)
1725 .copied()
1726 .unwrap_or_else(|| &self.coa.accounts[0])
1727 }
1728
1729 fn select_credit_account(&mut self) -> &GLAccount {
1730 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1731 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1732
1733 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1735 liability_accounts
1736 } else {
1737 revenue_accounts
1738 };
1739
1740 all.choose(&mut self.rng)
1741 .copied()
1742 .unwrap_or_else(|| &self.coa.accounts[0])
1743 }
1744}
1745
1746impl Generator for JournalEntryGenerator {
1747 type Item = JournalEntry;
1748 type Config = (
1749 TransactionConfig,
1750 Arc<ChartOfAccounts>,
1751 Vec<String>,
1752 NaiveDate,
1753 NaiveDate,
1754 );
1755
1756 fn new(config: Self::Config, seed: u64) -> Self {
1757 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1758 }
1759
1760 fn generate_one(&mut self) -> Self::Item {
1761 self.generate()
1762 }
1763
1764 fn reset(&mut self) {
1765 self.rng = seeded_rng(self.seed, 0);
1766 self.line_sampler.reset(self.seed + 1);
1767 self.amount_sampler.reset(self.seed + 2);
1768 self.temporal_sampler.reset(self.seed + 3);
1769 self.count = 0;
1770 self.uuid_factory.reset();
1771
1772 let mut ref_gen = ReferenceGenerator::new(
1774 self.start_date.year(),
1775 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1776 );
1777 ref_gen.set_prefix(
1778 ReferenceType::Invoice,
1779 &self.template_config.references.invoice_prefix,
1780 );
1781 ref_gen.set_prefix(
1782 ReferenceType::PurchaseOrder,
1783 &self.template_config.references.po_prefix,
1784 );
1785 ref_gen.set_prefix(
1786 ReferenceType::SalesOrder,
1787 &self.template_config.references.so_prefix,
1788 );
1789 self.reference_generator = ref_gen;
1790 }
1791
1792 fn count(&self) -> u64 {
1793 self.count
1794 }
1795
1796 fn seed(&self) -> u64 {
1797 self.seed
1798 }
1799}
1800
1801#[cfg(test)]
1802#[allow(clippy::unwrap_used)]
1803mod tests {
1804 use super::*;
1805 use crate::ChartOfAccountsGenerator;
1806
1807 #[test]
1808 fn test_generate_balanced_entries() {
1809 let mut coa_gen =
1810 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1811 let coa = Arc::new(coa_gen.generate());
1812
1813 let mut je_gen = JournalEntryGenerator::new_with_params(
1814 TransactionConfig::default(),
1815 coa,
1816 vec!["1000".to_string()],
1817 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1818 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1819 42,
1820 );
1821
1822 let mut balanced_count = 0;
1823 for _ in 0..100 {
1824 let entry = je_gen.generate();
1825
1826 let has_human_error = entry
1828 .header
1829 .header_text
1830 .as_ref()
1831 .map(|t| t.contains("[HUMAN_ERROR:"))
1832 .unwrap_or(false);
1833
1834 if !has_human_error {
1835 assert!(
1836 entry.is_balanced(),
1837 "Entry {:?} is not balanced",
1838 entry.header.document_id
1839 );
1840 balanced_count += 1;
1841 }
1842 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1843 }
1844
1845 assert!(
1847 balanced_count >= 80,
1848 "Expected at least 80 balanced entries, got {}",
1849 balanced_count
1850 );
1851 }
1852
1853 #[test]
1854 fn test_deterministic_generation() {
1855 let mut coa_gen =
1856 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1857 let coa = Arc::new(coa_gen.generate());
1858
1859 let mut gen1 = JournalEntryGenerator::new_with_params(
1860 TransactionConfig::default(),
1861 Arc::clone(&coa),
1862 vec!["1000".to_string()],
1863 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1864 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1865 42,
1866 );
1867
1868 let mut gen2 = JournalEntryGenerator::new_with_params(
1869 TransactionConfig::default(),
1870 coa,
1871 vec!["1000".to_string()],
1872 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1873 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1874 42,
1875 );
1876
1877 for _ in 0..50 {
1878 let e1 = gen1.generate();
1879 let e2 = gen2.generate();
1880 assert_eq!(e1.header.document_id, e2.header.document_id);
1881 assert_eq!(e1.total_debit(), e2.total_debit());
1882 }
1883 }
1884
1885 #[test]
1886 fn test_templates_generate_descriptions() {
1887 let mut coa_gen =
1888 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1889 let coa = Arc::new(coa_gen.generate());
1890
1891 let template_config = TemplateConfig {
1893 names: datasynth_config::schema::NameTemplateConfig {
1894 generate_realistic_names: true,
1895 email_domain: "test.com".to_string(),
1896 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1897 },
1898 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1899 generate_header_text: true,
1900 generate_line_text: true,
1901 },
1902 references: datasynth_config::schema::ReferenceTemplateConfig {
1903 generate_references: true,
1904 invoice_prefix: "TEST-INV".to_string(),
1905 po_prefix: "TEST-PO".to_string(),
1906 so_prefix: "TEST-SO".to_string(),
1907 },
1908 };
1909
1910 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1911 TransactionConfig::default(),
1912 coa,
1913 vec!["1000".to_string()],
1914 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1915 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1916 42,
1917 template_config,
1918 None,
1919 )
1920 .with_persona_errors(false); for _ in 0..10 {
1923 let entry = je_gen.generate();
1924
1925 assert!(
1927 entry.header.header_text.is_some(),
1928 "Header text should be populated"
1929 );
1930
1931 assert!(
1933 entry.header.reference.is_some(),
1934 "Reference should be populated"
1935 );
1936
1937 assert!(
1939 entry.header.business_process.is_some(),
1940 "Business process should be set"
1941 );
1942
1943 for line in &entry.lines {
1945 assert!(line.line_text.is_some(), "Line text should be populated");
1946 }
1947
1948 assert!(entry.is_balanced());
1950 }
1951 }
1952
1953 #[test]
1954 fn test_user_pool_integration() {
1955 let mut coa_gen =
1956 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1957 let coa = Arc::new(coa_gen.generate());
1958
1959 let companies = vec!["1000".to_string()];
1960
1961 let mut user_gen = crate::UserGenerator::new(42);
1963 let user_pool = user_gen.generate_standard(&companies);
1964
1965 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1966 TransactionConfig::default(),
1967 coa,
1968 companies,
1969 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1970 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1971 42,
1972 TemplateConfig::default(),
1973 Some(user_pool),
1974 );
1975
1976 for _ in 0..20 {
1978 let entry = je_gen.generate();
1979
1980 assert!(!entry.header.created_by.is_empty());
1983 }
1984 }
1985
1986 #[test]
1987 fn test_master_data_connection() {
1988 let mut coa_gen =
1989 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1990 let coa = Arc::new(coa_gen.generate());
1991
1992 let vendors = vec![
1994 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1995 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1996 ];
1997
1998 let customers = vec![
2000 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2001 Customer::new(
2002 "C-TEST-002",
2003 "Test Customer Two",
2004 CustomerType::SmallBusiness,
2005 ),
2006 ];
2007
2008 let materials = vec![Material::new(
2010 "MAT-TEST-001",
2011 "Test Material A",
2012 MaterialType::RawMaterial,
2013 )];
2014
2015 let generator = JournalEntryGenerator::new_with_params(
2017 TransactionConfig::default(),
2018 coa,
2019 vec!["1000".to_string()],
2020 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2021 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2022 42,
2023 );
2024
2025 assert!(!generator.is_using_real_master_data());
2027
2028 let generator_with_data = generator
2030 .with_vendors(&vendors)
2031 .with_customers(&customers)
2032 .with_materials(&materials);
2033
2034 assert!(generator_with_data.is_using_real_master_data());
2036 }
2037
2038 #[test]
2039 fn test_with_master_data_convenience_method() {
2040 let mut coa_gen =
2041 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2042 let coa = Arc::new(coa_gen.generate());
2043
2044 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2045 let customers = vec![Customer::new(
2046 "C-001",
2047 "Customer One",
2048 CustomerType::Corporate,
2049 )];
2050 let materials = vec![Material::new(
2051 "MAT-001",
2052 "Material One",
2053 MaterialType::RawMaterial,
2054 )];
2055
2056 let generator = JournalEntryGenerator::new_with_params(
2057 TransactionConfig::default(),
2058 coa,
2059 vec!["1000".to_string()],
2060 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2061 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2062 42,
2063 )
2064 .with_master_data(&vendors, &customers, &materials);
2065
2066 assert!(generator.is_using_real_master_data());
2067 }
2068
2069 #[test]
2070 fn test_stress_factors_increase_error_rate() {
2071 let mut coa_gen =
2072 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2073 let coa = Arc::new(coa_gen.generate());
2074
2075 let generator = JournalEntryGenerator::new_with_params(
2076 TransactionConfig::default(),
2077 coa,
2078 vec!["1000".to_string()],
2079 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2080 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2081 42,
2082 );
2083
2084 let base_rate = 0.1;
2085
2086 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2089 assert!(
2090 (regular_rate - base_rate).abs() < 0.01,
2091 "Regular day should have minimal stress factor adjustment"
2092 );
2093
2094 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2097 assert!(
2098 month_end_rate > regular_rate,
2099 "Month end should have higher error rate than regular day"
2100 );
2101
2102 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2105 assert!(
2106 year_end_rate > month_end_rate,
2107 "Year end should have highest error rate"
2108 );
2109
2110 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2113 assert!(
2114 friday_rate > regular_rate,
2115 "Friday should have higher error rate than mid-week"
2116 );
2117
2118 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2121 assert!(
2122 monday_rate > regular_rate,
2123 "Monday should have higher error rate than mid-week"
2124 );
2125 }
2126
2127 #[test]
2128 fn test_batching_produces_similar_entries() {
2129 let mut coa_gen =
2130 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2131 let coa = Arc::new(coa_gen.generate());
2132
2133 let mut je_gen = JournalEntryGenerator::new_with_params(
2135 TransactionConfig::default(),
2136 coa,
2137 vec!["1000".to_string()],
2138 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2139 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2140 123,
2141 )
2142 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2146
2147 for entry in &entries {
2149 assert!(
2150 entry.is_balanced(),
2151 "All entries including batched should be balanced"
2152 );
2153 }
2154
2155 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2157 std::collections::HashMap::new();
2158 for entry in &entries {
2159 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2160 }
2161
2162 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2164 assert!(
2165 dates_with_multiple > 0,
2166 "With batching, should see some dates with multiple entries"
2167 );
2168 }
2169
2170 #[test]
2171 fn test_temporal_patterns_business_days() {
2172 use datasynth_config::schema::{
2173 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2174 };
2175
2176 let mut coa_gen =
2177 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2178 let coa = Arc::new(coa_gen.generate());
2179
2180 let temporal_config = TemporalPatternsConfig {
2182 enabled: true,
2183 business_days: BusinessDaySchemaConfig {
2184 enabled: true,
2185 ..Default::default()
2186 },
2187 calendars: CalendarSchemaConfig {
2188 regions: vec!["US".to_string()],
2189 custom_holidays: vec![],
2190 },
2191 ..Default::default()
2192 };
2193
2194 let mut je_gen = JournalEntryGenerator::new_with_params(
2195 TransactionConfig::default(),
2196 coa,
2197 vec!["1000".to_string()],
2198 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2199 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2201 )
2202 .with_temporal_patterns(temporal_config, 42)
2203 .with_persona_errors(false);
2204
2205 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2207
2208 for entry in &entries {
2209 let weekday = entry.header.posting_date.weekday();
2210 assert!(
2211 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2212 "Posting date {:?} should not be a weekend",
2213 entry.header.posting_date
2214 );
2215 }
2216 }
2217}