1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15 TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18 AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19 DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20 PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21 ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34pub struct JournalEntryGenerator {
36 rng: ChaCha8Rng,
37 seed: u64,
38 config: TransactionConfig,
39 coa: Arc<ChartOfAccounts>,
40 companies: Vec<String>,
41 company_selector: WeightedCompanySelector,
42 line_sampler: LineItemSampler,
43 amount_sampler: AmountSampler,
44 temporal_sampler: TemporalSampler,
45 start_date: NaiveDate,
46 end_date: NaiveDate,
47 count: u64,
48 uuid_factory: DeterministicUuidFactory,
49 user_pool: Option<UserPool>,
51 description_generator: DescriptionGenerator,
52 reference_generator: ReferenceGenerator,
53 template_config: TemplateConfig,
54 vendor_pool: VendorPool,
55 customer_pool: CustomerPool,
56 material_pool: Option<MaterialPool>,
58 using_real_master_data: bool,
60 fraud_config: FraudConfig,
62 persona_errors_enabled: bool,
64 approval_enabled: bool,
66 approval_threshold: rust_decimal::Decimal,
67 sod_violation_rate: f64,
69 batch_state: Option<BatchState>,
71 drift_controller: Option<DriftController>,
73 business_day_calculator: Option<BusinessDayCalculator>,
75 processing_lag_calculator: Option<ProcessingLagCalculator>,
76 temporal_patterns_config: Option<TemporalPatternsConfig>,
77 business_process_weights: [(BusinessProcess, f64); 5],
81 advanced_amount_sampler: Option<AdvancedAmountSampler>,
85 conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93 correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102 (BusinessProcess::O2C, 0.35),
103 (BusinessProcess::P2P, 0.30),
104 (BusinessProcess::R2R, 0.20),
105 (BusinessProcess::H2R, 0.10),
106 (BusinessProcess::A2R, 0.05),
107];
108
109impl JournalEntryGenerator {
123 fn supported_conditional_input(field: &str) -> bool {
124 matches!(field, "month" | "quarter" | "constant" | "")
125 }
126
127 fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
128 match self
129 .conditional_amount_override
130 .as_ref()
131 .map(|s| s.config().input_field.as_str())
132 {
133 Some("month") => posting_date.month() as f64,
134 Some("quarter") => ((posting_date.month() - 1) / 3 + 1) as f64,
135 _ => 0.0,
136 }
137 }
138}
139
140fn industry_profile_to_log_normal(
141 p: datasynth_config::schema::IndustryProfileType,
142) -> datasynth_core::distributions::LogNormalMixtureConfig {
143 use datasynth_config::schema::IndustryProfileType as P;
144 let industry = match p {
145 P::Retail => IndustryType::Retail,
146 P::Manufacturing => IndustryType::Manufacturing,
147 P::FinancialServices => IndustryType::FinancialServices,
148 P::Healthcare => IndustryType::Healthcare,
149 P::Technology => IndustryType::Technology,
150 };
151 IndustryAmountProfile::for_industry(industry).sales_amounts
152}
153
154#[derive(Clone)]
159struct BatchState {
160 base_account_number: String,
162 base_amount: rust_decimal::Decimal,
163 base_business_process: Option<BusinessProcess>,
164 base_posting_date: NaiveDate,
165 remaining: u8,
167}
168
169impl JournalEntryGenerator {
170 pub fn new_with_params(
172 config: TransactionConfig,
173 coa: Arc<ChartOfAccounts>,
174 companies: Vec<String>,
175 start_date: NaiveDate,
176 end_date: NaiveDate,
177 seed: u64,
178 ) -> Self {
179 Self::new_with_full_config(
180 config,
181 coa,
182 companies,
183 start_date,
184 end_date,
185 seed,
186 TemplateConfig::default(),
187 None,
188 )
189 }
190
191 #[allow(clippy::too_many_arguments)]
193 pub fn new_with_full_config(
194 config: TransactionConfig,
195 coa: Arc<ChartOfAccounts>,
196 companies: Vec<String>,
197 start_date: NaiveDate,
198 end_date: NaiveDate,
199 seed: u64,
200 template_config: TemplateConfig,
201 user_pool: Option<UserPool>,
202 ) -> Self {
203 let user_pool = user_pool.or_else(|| {
205 if template_config.names.generate_realistic_names {
206 let user_gen_config = UserGeneratorConfig {
207 culture_distribution: vec![
208 (
209 datasynth_core::templates::NameCulture::WesternUs,
210 template_config.names.culture_distribution.western_us,
211 ),
212 (
213 datasynth_core::templates::NameCulture::Hispanic,
214 template_config.names.culture_distribution.hispanic,
215 ),
216 (
217 datasynth_core::templates::NameCulture::German,
218 template_config.names.culture_distribution.german,
219 ),
220 (
221 datasynth_core::templates::NameCulture::French,
222 template_config.names.culture_distribution.french,
223 ),
224 (
225 datasynth_core::templates::NameCulture::Chinese,
226 template_config.names.culture_distribution.chinese,
227 ),
228 (
229 datasynth_core::templates::NameCulture::Japanese,
230 template_config.names.culture_distribution.japanese,
231 ),
232 (
233 datasynth_core::templates::NameCulture::Indian,
234 template_config.names.culture_distribution.indian,
235 ),
236 ],
237 email_domain: template_config.names.email_domain.clone(),
238 generate_realistic_names: true,
239 };
240 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
241 Some(user_gen.generate_standard(&companies))
242 } else {
243 None
244 }
245 });
246
247 let mut ref_gen = ReferenceGenerator::new(
249 start_date.year(),
250 companies
251 .first()
252 .map(std::string::String::as_str)
253 .unwrap_or("1000"),
254 );
255 ref_gen.set_prefix(
256 ReferenceType::Invoice,
257 &template_config.references.invoice_prefix,
258 );
259 ref_gen.set_prefix(
260 ReferenceType::PurchaseOrder,
261 &template_config.references.po_prefix,
262 );
263 ref_gen.set_prefix(
264 ReferenceType::SalesOrder,
265 &template_config.references.so_prefix,
266 );
267
268 let company_selector = WeightedCompanySelector::uniform(companies.clone());
270
271 Self {
272 rng: seeded_rng(seed, 0),
273 seed,
274 config: config.clone(),
275 coa,
276 companies,
277 company_selector,
278 line_sampler: LineItemSampler::with_config(
279 seed + 1,
280 config.line_item_distribution.clone(),
281 config.even_odd_distribution.clone(),
282 config.debit_credit_distribution.clone(),
283 ),
284 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
285 temporal_sampler: TemporalSampler::with_config(
286 seed + 3,
287 config.seasonality.clone(),
288 WorkingHoursConfig::default(),
289 Vec::new(),
290 ),
291 start_date,
292 end_date,
293 count: 0,
294 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
295 user_pool,
296 description_generator: DescriptionGenerator::new(),
297 reference_generator: ref_gen,
298 template_config,
299 vendor_pool: VendorPool::standard(),
300 customer_pool: CustomerPool::standard(),
301 material_pool: None,
302 using_real_master_data: false,
303 fraud_config: FraudConfig::default(),
304 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
309 drift_controller: None,
310 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
313 Region::US,
314 start_date.year(),
315 ))),
316 processing_lag_calculator: None,
317 temporal_patterns_config: None,
318 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
319 advanced_amount_sampler: None,
320 conditional_amount_override: None,
321 correlation_copula: None,
322 }
323 }
324
325 pub fn set_advanced_distributions(
344 &mut self,
345 config: &AdvancedDistributionConfig,
346 seed: u64,
347 ) -> Result<(), String> {
348 if !config.enabled {
349 return Ok(());
350 }
351
352 self.conditional_amount_override = config
358 .conditional
359 .iter()
360 .find(|c| {
361 c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
362 })
363 .and_then(|c| {
364 datasynth_core::distributions::ConditionalSampler::new(
365 seed.wrapping_add(17),
366 c.to_core_config(),
367 )
368 .ok()
369 });
370
371 self.correlation_copula = config
376 .correlations
377 .to_core_config_for_pair("amount", "line_count")
378 .filter(|c| {
379 matches!(
380 c.copula_type,
381 datasynth_core::distributions::CopulaType::Gaussian
382 )
383 })
384 .and_then(|copula_cfg| {
385 datasynth_core::distributions::BivariateCopulaSampler::new(
386 seed.wrapping_add(31),
387 copula_cfg,
388 )
389 .ok()
390 });
391
392 if let Some(pareto) = &config.pareto {
397 if pareto.enabled {
398 let core_cfg = pareto.to_core_config();
399 self.advanced_amount_sampler =
400 Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
401 return Ok(());
402 }
403 }
404
405 if !config.amounts.enabled {
406 return Ok(());
407 }
408
409 match config.amounts.distribution_type {
410 MixtureDistributionType::LogNormal => {
411 let lognormal_cfg = config
412 .amounts
413 .to_log_normal_config()
414 .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
415 if let Some(cfg) = lognormal_cfg {
416 self.advanced_amount_sampler =
417 Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
418 }
419 }
420 MixtureDistributionType::Gaussian => {
421 if let Some(cfg) = config.amounts.to_gaussian_config() {
422 self.advanced_amount_sampler =
423 Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
424 }
425 }
426 }
427
428 Ok(())
429 }
430
431 pub fn set_business_process_weights(
435 &mut self,
436 o2c: f64,
437 p2p: f64,
438 r2r: f64,
439 h2r: f64,
440 a2r: f64,
441 ) {
442 self.business_process_weights = [
443 (BusinessProcess::O2C, o2c),
444 (BusinessProcess::P2P, p2p),
445 (BusinessProcess::R2R, r2r),
446 (BusinessProcess::H2R, h2r),
447 (BusinessProcess::A2R, a2r),
448 ];
449 }
450
451 pub fn from_generator_config(
456 full_config: &GeneratorConfig,
457 coa: Arc<ChartOfAccounts>,
458 start_date: NaiveDate,
459 end_date: NaiveDate,
460 seed: u64,
461 ) -> Self {
462 let companies: Vec<String> = full_config
463 .companies
464 .iter()
465 .map(|c| c.code.clone())
466 .collect();
467
468 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
470
471 let mut generator = Self::new_with_full_config(
472 full_config.transactions.clone(),
473 coa,
474 companies,
475 start_date,
476 end_date,
477 seed,
478 full_config.templates.clone(),
479 None,
480 );
481
482 generator.company_selector = company_selector;
484
485 generator.fraud_config = full_config.fraud.clone();
487
488 let temporal_config = &full_config.temporal_patterns;
490 if temporal_config.enabled {
491 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
492 }
493
494 generator
495 }
496
497 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
504 if config.business_days.enabled {
506 let region = config
507 .calendars
508 .regions
509 .first()
510 .map(|r| Self::parse_region(r))
511 .unwrap_or(Region::US);
512
513 let calendar = HolidayCalendar::new(region, self.start_date.year());
514 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
515 }
516
517 if config.processing_lags.enabled {
519 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
520 self.processing_lag_calculator =
521 Some(ProcessingLagCalculator::with_config(seed, lag_config));
522 }
523
524 let model = config.period_end.model.as_deref().unwrap_or("flat");
526 if model != "flat"
527 || config
528 .period_end
529 .month_end
530 .as_ref()
531 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
532 {
533 let dynamics = Self::convert_period_end_config(&config.period_end);
534 self.temporal_sampler.set_period_end_dynamics(dynamics);
535 }
536
537 self.temporal_patterns_config = Some(config);
538 self
539 }
540
541 pub fn with_country_pack_temporal(
549 mut self,
550 config: TemporalPatternsConfig,
551 seed: u64,
552 pack: &CountryPack,
553 ) -> Self {
554 if config.business_days.enabled {
556 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
557 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
558 }
559
560 if config.processing_lags.enabled {
562 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
563 self.processing_lag_calculator =
564 Some(ProcessingLagCalculator::with_config(seed, lag_config));
565 }
566
567 let model = config.period_end.model.as_deref().unwrap_or("flat");
569 if model != "flat"
570 || config
571 .period_end
572 .month_end
573 .as_ref()
574 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
575 {
576 let dynamics = Self::convert_period_end_config(&config.period_end);
577 self.temporal_sampler.set_period_end_dynamics(dynamics);
578 }
579
580 self.temporal_patterns_config = Some(config);
581 self
582 }
583
584 fn convert_processing_lag_config(
586 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
587 ) -> ProcessingLagConfig {
588 let mut config = ProcessingLagConfig {
589 enabled: schema.enabled,
590 ..Default::default()
591 };
592
593 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
595 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
596 if let Some(min) = lag.min_hours {
597 dist.min_lag_hours = min;
598 }
599 if let Some(max) = lag.max_hours {
600 dist.max_lag_hours = max;
601 }
602 dist
603 };
604
605 if let Some(ref lag) = schema.sales_order_lag {
607 config
608 .event_lags
609 .insert(EventType::SalesOrder, convert_lag(lag));
610 }
611 if let Some(ref lag) = schema.purchase_order_lag {
612 config
613 .event_lags
614 .insert(EventType::PurchaseOrder, convert_lag(lag));
615 }
616 if let Some(ref lag) = schema.goods_receipt_lag {
617 config
618 .event_lags
619 .insert(EventType::GoodsReceipt, convert_lag(lag));
620 }
621 if let Some(ref lag) = schema.invoice_receipt_lag {
622 config
623 .event_lags
624 .insert(EventType::InvoiceReceipt, convert_lag(lag));
625 }
626 if let Some(ref lag) = schema.invoice_issue_lag {
627 config
628 .event_lags
629 .insert(EventType::InvoiceIssue, convert_lag(lag));
630 }
631 if let Some(ref lag) = schema.payment_lag {
632 config
633 .event_lags
634 .insert(EventType::Payment, convert_lag(lag));
635 }
636 if let Some(ref lag) = schema.journal_entry_lag {
637 config
638 .event_lags
639 .insert(EventType::JournalEntry, convert_lag(lag));
640 }
641
642 if let Some(ref cross_day) = schema.cross_day_posting {
644 config.cross_day = CrossDayConfig {
645 enabled: cross_day.enabled,
646 probability_by_hour: cross_day.probability_by_hour.clone(),
647 ..Default::default()
648 };
649 }
650
651 config
652 }
653
654 fn convert_period_end_config(
656 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
657 ) -> PeriodEndDynamics {
658 let model_type = schema.model.as_deref().unwrap_or("exponential");
659
660 let convert_period =
662 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
663 default_peak: f64|
664 -> PeriodEndConfig {
665 if let Some(p) = period {
666 let model = match model_type {
667 "flat" => PeriodEndModel::FlatMultiplier {
668 multiplier: p.peak_multiplier.unwrap_or(default_peak),
669 },
670 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
671 start_day: p.start_day.unwrap_or(-10),
672 sustained_high_days: p.sustained_high_days.unwrap_or(3),
673 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
674 ramp_up_days: 3, },
676 _ => PeriodEndModel::ExponentialAcceleration {
677 start_day: p.start_day.unwrap_or(-10),
678 base_multiplier: p.base_multiplier.unwrap_or(1.0),
679 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
680 decay_rate: p.decay_rate.unwrap_or(0.3),
681 },
682 };
683 PeriodEndConfig {
684 enabled: true,
685 model,
686 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
687 }
688 } else {
689 PeriodEndConfig {
690 enabled: true,
691 model: PeriodEndModel::ExponentialAcceleration {
692 start_day: -10,
693 base_multiplier: 1.0,
694 peak_multiplier: default_peak,
695 decay_rate: 0.3,
696 },
697 additional_multiplier: 1.0,
698 }
699 }
700 };
701
702 PeriodEndDynamics::new(
703 convert_period(schema.month_end.as_ref(), 2.0),
704 convert_period(schema.quarter_end.as_ref(), 3.5),
705 convert_period(schema.year_end.as_ref(), 5.0),
706 )
707 }
708
709 fn parse_region(region_str: &str) -> Region {
711 match region_str.to_uppercase().as_str() {
712 "US" => Region::US,
713 "DE" => Region::DE,
714 "GB" => Region::GB,
715 "CN" => Region::CN,
716 "JP" => Region::JP,
717 "IN" => Region::IN,
718 "BR" => Region::BR,
719 "MX" => Region::MX,
720 "AU" => Region::AU,
721 "SG" => Region::SG,
722 "KR" => Region::KR,
723 "FR" => Region::FR,
724 "IT" => Region::IT,
725 "ES" => Region::ES,
726 "CA" => Region::CA,
727 _ => Region::US,
728 }
729 }
730
731 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
733 self.company_selector = selector;
734 }
735
736 pub fn company_selector(&self) -> &WeightedCompanySelector {
738 &self.company_selector
739 }
740
741 pub fn set_fraud_config(&mut self, config: FraudConfig) {
743 self.fraud_config = config;
744 }
745
746 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
751 if !vendors.is_empty() {
752 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
753 self.using_real_master_data = true;
754 }
755 self
756 }
757
758 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
763 if !customers.is_empty() {
764 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
765 self.using_real_master_data = true;
766 }
767 self
768 }
769
770 pub fn with_materials(mut self, materials: &[Material]) -> Self {
774 if !materials.is_empty() {
775 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
776 self.using_real_master_data = true;
777 }
778 self
779 }
780
781 pub fn with_master_data(
786 self,
787 vendors: &[Vendor],
788 customers: &[Customer],
789 materials: &[Material],
790 ) -> Self {
791 self.with_vendors(vendors)
792 .with_customers(customers)
793 .with_materials(materials)
794 }
795
796 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
803 let name_gen =
804 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
805 let config = UserGeneratorConfig {
806 culture_distribution: Vec::new(),
809 email_domain: name_gen.email_domain().to_string(),
810 generate_realistic_names: true,
811 };
812 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
813 self.user_pool = Some(user_gen.generate_standard(&self.companies));
814 self
815 }
816
817 pub fn is_using_real_master_data(&self) -> bool {
819 self.using_real_master_data
820 }
821
822 fn determine_fraud(&mut self) -> Option<FraudType> {
824 if !self.fraud_config.enabled {
825 return None;
826 }
827
828 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
830 return None;
831 }
832
833 Some(self.select_fraud_type())
835 }
836
837 fn select_fraud_type(&mut self) -> FraudType {
839 let dist = &self.fraud_config.fraud_type_distribution;
840 let roll: f64 = self.rng.random();
841
842 let mut cumulative = 0.0;
843
844 cumulative += dist.suspense_account_abuse;
845 if roll < cumulative {
846 return FraudType::SuspenseAccountAbuse;
847 }
848
849 cumulative += dist.fictitious_transaction;
850 if roll < cumulative {
851 return FraudType::FictitiousTransaction;
852 }
853
854 cumulative += dist.revenue_manipulation;
855 if roll < cumulative {
856 return FraudType::RevenueManipulation;
857 }
858
859 cumulative += dist.expense_capitalization;
860 if roll < cumulative {
861 return FraudType::ExpenseCapitalization;
862 }
863
864 cumulative += dist.split_transaction;
865 if roll < cumulative {
866 return FraudType::SplitTransaction;
867 }
868
869 cumulative += dist.timing_anomaly;
870 if roll < cumulative {
871 return FraudType::TimingAnomaly;
872 }
873
874 cumulative += dist.unauthorized_access;
875 if roll < cumulative {
876 return FraudType::UnauthorizedAccess;
877 }
878
879 FraudType::DuplicatePayment
881 }
882
883 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
885 match fraud_type {
886 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
887 FraudAmountPattern::ThresholdAdjacent
888 }
889 FraudType::FictitiousTransaction
890 | FraudType::FictitiousEntry
891 | FraudType::SuspenseAccountAbuse
892 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
893 FraudType::RevenueManipulation
894 | FraudType::ExpenseCapitalization
895 | FraudType::ImproperCapitalization
896 | FraudType::ReserveManipulation
897 | FraudType::UnauthorizedAccess
898 | FraudType::PrematureRevenue
899 | FraudType::UnderstatedLiabilities
900 | FraudType::OverstatedAssets
901 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
902 FraudType::DuplicatePayment
903 | FraudType::TimingAnomaly
904 | FraudType::SelfApproval
905 | FraudType::ExceededApprovalLimit
906 | FraudType::SegregationOfDutiesViolation
907 | FraudType::UnauthorizedApproval
908 | FraudType::CollusiveApproval
909 | FraudType::FictitiousVendor
910 | FraudType::ShellCompanyPayment
911 | FraudType::Kickback
912 | FraudType::KickbackScheme
913 | FraudType::InvoiceManipulation
914 | FraudType::AssetMisappropriation
915 | FraudType::InventoryTheft
916 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
917 FraudType::ImproperRevenueRecognition
919 | FraudType::ImproperPoAllocation
920 | FraudType::VariableConsiderationManipulation
921 | FraudType::ContractModificationMisstatement => {
922 FraudAmountPattern::StatisticallyImprobable
923 }
924 FraudType::LeaseClassificationManipulation
926 | FraudType::OffBalanceSheetLease
927 | FraudType::LeaseLiabilityUnderstatement
928 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
929 FraudType::FairValueHierarchyManipulation
931 | FraudType::Level3InputManipulation
932 | FraudType::ValuationTechniqueManipulation => {
933 FraudAmountPattern::StatisticallyImprobable
934 }
935 FraudType::DelayedImpairment
937 | FraudType::ImpairmentTestAvoidance
938 | FraudType::CashFlowProjectionManipulation
939 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
940 FraudType::BidRigging
942 | FraudType::PhantomVendorContract
943 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
944 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
945 FraudType::GhostEmployeePayroll
947 | FraudType::PayrollInflation
948 | FraudType::DuplicateExpenseReport
949 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
950 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
951 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
953 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
954 }
955 }
956
957 #[inline]
959 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
960 self.uuid_factory.next()
961 }
962
963 const COST_CENTER_POOL: &'static [&'static str] =
965 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
966
967 fn enrich_line_items(&self, entry: &mut JournalEntry) {
973 let posting_date = entry.header.posting_date;
974 let company_code = &entry.header.company_code;
975 let header_text = entry.header.header_text.clone();
976 let business_process = entry.header.business_process;
977
978 let doc_id_bytes = entry.header.document_id.as_bytes();
980 let mut cc_seed: usize = 0;
981 for &b in doc_id_bytes {
982 cc_seed = cc_seed.wrapping_add(b as usize);
983 }
984
985 for (i, line) in entry.lines.iter_mut().enumerate() {
986 if line.account_description.is_none() {
988 line.account_description = self
989 .coa
990 .get_account(&line.gl_account)
991 .map(|a| a.short_description.clone());
992 }
993
994 if line.cost_center.is_none() {
996 let first_char = line.gl_account.chars().next().unwrap_or('0');
997 if first_char == '5' || first_char == '6' {
998 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
999 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1000 }
1001 }
1002
1003 if line.profit_center.is_none() {
1005 let suffix = match business_process {
1006 Some(BusinessProcess::P2P) => "-P2P",
1007 Some(BusinessProcess::O2C) => "-O2C",
1008 Some(BusinessProcess::R2R) => "-R2R",
1009 Some(BusinessProcess::H2R) => "-H2R",
1010 _ => "",
1011 };
1012 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1013 }
1014
1015 if line.line_text.is_none() {
1017 line.line_text = header_text.clone();
1018 }
1019
1020 if line.value_date.is_none()
1022 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1023 {
1024 line.value_date = Some(posting_date);
1025 }
1026
1027 if line.assignment.is_none() {
1029 if line.gl_account.starts_with("2000") {
1030 if let Some(ref ht) = header_text {
1032 if let Some(vendor_part) = ht.rsplit(" - ").next() {
1034 if vendor_part.starts_with("V-")
1035 || vendor_part.starts_with("VENDOR")
1036 || vendor_part.starts_with("Vendor")
1037 {
1038 line.assignment = Some(vendor_part.to_string());
1039 }
1040 }
1041 }
1042 } else if line.gl_account.starts_with("1100") {
1043 if let Some(ref ht) = header_text {
1045 if let Some(customer_part) = ht.rsplit(" - ").next() {
1046 if customer_part.starts_with("C-")
1047 || customer_part.starts_with("CUST")
1048 || customer_part.starts_with("Customer")
1049 {
1050 line.assignment = Some(customer_part.to_string());
1051 }
1052 }
1053 }
1054 }
1055 }
1056 }
1057 }
1058
1059 pub fn generate(&mut self) -> JournalEntry {
1061 debug!(
1062 count = self.count,
1063 companies = self.companies.len(),
1064 start_date = %self.start_date,
1065 end_date = %self.end_date,
1066 "Generating journal entry"
1067 );
1068
1069 if let Some(ref state) = self.batch_state {
1071 if state.remaining > 0 {
1072 return self.generate_batched_entry();
1073 }
1074 }
1075
1076 self.count += 1;
1077
1078 let document_id = self.generate_deterministic_uuid();
1080
1081 let mut posting_date = self
1083 .temporal_sampler
1084 .sample_date(self.start_date, self.end_date);
1085
1086 if let Some(ref calc) = self.business_day_calculator {
1088 if !calc.is_business_day(posting_date) {
1089 posting_date = calc.next_business_day(posting_date, false);
1091 if posting_date > self.end_date {
1093 posting_date = calc.prev_business_day(self.end_date, true);
1094 }
1095 }
1096 }
1097
1098 let company_code = self.company_selector.select(&mut self.rng).to_string();
1100
1101 let line_spec = self.line_sampler.sample();
1103
1104 let source = self.select_source();
1106 let is_automated = matches!(
1107 source,
1108 TransactionSource::Automated | TransactionSource::Recurring
1109 );
1110
1111 let business_process = self.select_business_process();
1113
1114 let fraud_type = self.determine_fraud();
1116 let is_fraud = fraud_type.is_some();
1117
1118 let time = self.temporal_sampler.sample_time(!is_automated);
1120 let created_at = posting_date.and_time(time).and_utc();
1121
1122 let (created_by, user_persona) = self.select_user(is_automated);
1124
1125 let mut header =
1127 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1128 header.created_at = created_at;
1129 header.source = source;
1130 header.created_by = created_by;
1131 header.user_persona = user_persona;
1132 header.business_process = Some(business_process);
1133 header.document_type = Self::document_type_for_process(business_process).to_string();
1134 header.is_fraud = is_fraud;
1135 header.fraud_type = fraud_type;
1136
1137 let is_manual = matches!(source, TransactionSource::Manual);
1139 header.is_manual = is_manual;
1140
1141 header.source_system = if is_manual {
1143 if self.rng.random::<f64>() < 0.70 {
1144 "manual".to_string()
1145 } else {
1146 "spreadsheet".to_string()
1147 }
1148 } else {
1149 let roll: f64 = self.rng.random();
1150 if roll < 0.40 {
1151 "SAP-FI".to_string()
1152 } else if roll < 0.60 {
1153 "SAP-MM".to_string()
1154 } else if roll < 0.80 {
1155 "SAP-SD".to_string()
1156 } else if roll < 0.95 {
1157 "interface".to_string()
1158 } else {
1159 "SAP-HR".to_string()
1160 }
1161 };
1162
1163 let is_post_close = posting_date.month() == self.end_date.month()
1166 && posting_date.year() == self.end_date.year()
1167 && posting_date.day() > 25;
1168 header.is_post_close = is_post_close;
1169
1170 let created_date = if is_manual {
1173 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1174 } else {
1175 let lag_days = self.rng.random_range(0i64..=3);
1176 let created_naive_date = posting_date
1177 .checked_sub_signed(chrono::Duration::days(lag_days))
1178 .unwrap_or(posting_date);
1179 created_naive_date.and_hms_opt(
1180 self.rng.random_range(8u32..=17),
1181 self.rng.random_range(0u32..=59),
1182 self.rng.random_range(0u32..=59),
1183 )
1184 };
1185 header.created_date = created_date;
1186
1187 let mut context =
1189 DescriptionContext::with_period(posting_date.month(), posting_date.year());
1190
1191 match business_process {
1193 BusinessProcess::P2P => {
1194 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1195 context.vendor_name = Some(vendor.name.clone());
1196 }
1197 }
1198 BusinessProcess::O2C => {
1199 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1200 context.customer_name = Some(customer.name.clone());
1201 }
1202 }
1203 _ => {}
1204 }
1205
1206 if self.template_config.descriptions.generate_header_text {
1208 header.header_text = Some(self.description_generator.generate_header_text(
1209 business_process,
1210 &context,
1211 &mut self.rng,
1212 ));
1213 }
1214
1215 if self.template_config.references.generate_references {
1217 header.reference = Some(
1218 self.reference_generator
1219 .generate_for_process_year(business_process, posting_date.year()),
1220 );
1221 }
1222
1223 header.source_document = header
1225 .reference
1226 .as_deref()
1227 .and_then(DocumentRef::parse)
1228 .or_else(|| {
1229 if header.source == TransactionSource::Manual {
1230 Some(DocumentRef::Manual)
1231 } else {
1232 None
1233 }
1234 });
1235
1236 let mut entry = JournalEntry::new(header);
1238
1239 let base_amount = if let Some(ft) = fraud_type {
1245 let pattern = self.fraud_type_to_amount_pattern(ft);
1246 self.amount_sampler.sample_fraud(pattern)
1247 } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1248 adv.sample_decimal()
1249 } else {
1250 self.amount_sampler.sample()
1251 };
1252 let base_amount = if fraud_type.is_none() {
1258 let input = self.conditional_input_value(posting_date);
1262 if let Some(ref mut cond) = self.conditional_amount_override {
1263 cond.sample_decimal(input)
1264 } else {
1265 base_amount
1266 }
1267 } else {
1268 base_amount
1269 };
1270
1271 let base_amount = if fraud_type.is_none() {
1277 if let Some(ref mut cop) = self.correlation_copula {
1278 let (u, _v) = cop.sample();
1279 let multiplier = 0.7 + 0.6 * u;
1280 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1281 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1282 } else {
1283 base_amount
1284 }
1285 } else {
1286 base_amount
1287 };
1288
1289 let drift_adjusted_amount = {
1291 let drift = self.get_drift_adjustments(posting_date);
1292 if drift.amount_mean_multiplier != 1.0 {
1293 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1295 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1296 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1297 } else {
1298 base_amount
1299 }
1300 };
1301
1302 let total_amount = if is_automated {
1304 drift_adjusted_amount } else {
1306 self.apply_human_variation(drift_adjusted_amount)
1307 };
1308
1309 let debit_amounts = self
1311 .amount_sampler
1312 .sample_summing_to(line_spec.debit_count, total_amount);
1313 for (i, amount) in debit_amounts.into_iter().enumerate() {
1314 let account_number = self.select_debit_account().account_number.clone();
1315 let mut line = JournalEntryLine::debit(
1316 entry.header.document_id,
1317 (i + 1) as u32,
1318 account_number.clone(),
1319 amount,
1320 );
1321
1322 if self.template_config.descriptions.generate_line_text {
1324 line.line_text = Some(self.description_generator.generate_line_text(
1325 &account_number,
1326 &context,
1327 &mut self.rng,
1328 ));
1329 }
1330
1331 entry.add_line(line);
1332 }
1333
1334 let credit_amounts = self
1336 .amount_sampler
1337 .sample_summing_to(line_spec.credit_count, total_amount);
1338 for (i, amount) in credit_amounts.into_iter().enumerate() {
1339 let account_number = self.select_credit_account().account_number.clone();
1340 let mut line = JournalEntryLine::credit(
1341 entry.header.document_id,
1342 (line_spec.debit_count + i + 1) as u32,
1343 account_number.clone(),
1344 amount,
1345 );
1346
1347 if self.template_config.descriptions.generate_line_text {
1349 line.line_text = Some(self.description_generator.generate_line_text(
1350 &account_number,
1351 &context,
1352 &mut self.rng,
1353 ));
1354 }
1355
1356 entry.add_line(line);
1357 }
1358
1359 self.enrich_line_items(&mut entry);
1361
1362 if self.persona_errors_enabled && !is_automated {
1364 self.maybe_inject_persona_error(&mut entry);
1365 }
1366
1367 if self.approval_enabled {
1369 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1370 }
1371
1372 self.populate_approval_fields(&mut entry, posting_date);
1374
1375 self.maybe_start_batch(&entry);
1377
1378 entry
1379 }
1380
1381 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1386 self.persona_errors_enabled = enabled;
1387 self
1388 }
1389
1390 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1395 self.fraud_config = config;
1396 self
1397 }
1398
1399 pub fn persona_errors_enabled(&self) -> bool {
1401 self.persona_errors_enabled
1402 }
1403
1404 pub fn with_batching(mut self, enabled: bool) -> Self {
1409 if !enabled {
1410 self.batch_state = None;
1411 }
1412 self
1413 }
1414
1415 pub fn batching_enabled(&self) -> bool {
1417 true
1419 }
1420
1421 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1426 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1428 return;
1429 }
1430
1431 if self.rng.random::<f64>() > 0.15 {
1433 return;
1434 }
1435
1436 let base_account = entry
1438 .lines
1439 .first()
1440 .map(|l| l.gl_account.clone())
1441 .unwrap_or_default();
1442
1443 let base_amount = entry.total_debit();
1444
1445 self.batch_state = Some(BatchState {
1446 base_account_number: base_account,
1447 base_amount,
1448 base_business_process: entry.header.business_process,
1449 base_posting_date: entry.header.posting_date,
1450 remaining: self.rng.random_range(2..7), });
1452 }
1453
1454 fn generate_batched_entry(&mut self) -> JournalEntry {
1462 use rust_decimal::Decimal;
1463
1464 if let Some(ref mut state) = self.batch_state {
1466 state.remaining = state.remaining.saturating_sub(1);
1467 }
1468
1469 let Some(batch) = self.batch_state.clone() else {
1470 tracing::warn!(
1473 "generate_batched_entry called without batch_state; generating standard entry"
1474 );
1475 self.batch_state = None;
1476 return self.generate();
1477 };
1478
1479 let posting_date = batch.base_posting_date;
1481
1482 self.count += 1;
1483 let document_id = self.generate_deterministic_uuid();
1484
1485 let company_code = self.company_selector.select(&mut self.rng).to_string();
1487
1488 let _line_spec = LineItemSpec {
1490 total_count: 2,
1491 debit_count: 1,
1492 credit_count: 1,
1493 split_type: DebitCreditSplit::Equal,
1494 };
1495
1496 let source = TransactionSource::Manual;
1498
1499 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1501
1502 let time = self.temporal_sampler.sample_time(true);
1504 let created_at = posting_date.and_time(time).and_utc();
1505
1506 let (created_by, user_persona) = self.select_user(false);
1508
1509 let mut header =
1511 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1512 header.created_at = created_at;
1513 header.source = source;
1514 header.created_by = created_by;
1515 header.user_persona = user_persona;
1516 header.business_process = Some(business_process);
1517 header.document_type = Self::document_type_for_process(business_process).to_string();
1518
1519 header.source_document = Some(DocumentRef::Manual);
1521
1522 header.is_manual = true;
1524 header.source_system = if self.rng.random::<f64>() < 0.70 {
1525 "manual".to_string()
1526 } else {
1527 "spreadsheet".to_string()
1528 };
1529 header.is_post_close = posting_date.month() == self.end_date.month()
1530 && posting_date.year() == self.end_date.year()
1531 && posting_date.day() > 25;
1532 header.created_date =
1533 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1534
1535 let variation = self.rng.random_range(-0.15..0.15);
1537 let varied_amount =
1538 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1539 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1540
1541 let mut entry = JournalEntry::new(header);
1543
1544 let debit_line = JournalEntryLine::debit(
1546 entry.header.document_id,
1547 1,
1548 batch.base_account_number.clone(),
1549 total_amount,
1550 );
1551 entry.add_line(debit_line);
1552
1553 let credit_account = self.select_credit_account().account_number.clone();
1555 let credit_line =
1556 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1557 entry.add_line(credit_line);
1558
1559 self.enrich_line_items(&mut entry);
1561
1562 if self.persona_errors_enabled {
1564 self.maybe_inject_persona_error(&mut entry);
1565 }
1566
1567 if self.approval_enabled {
1569 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1570 }
1571
1572 self.populate_approval_fields(&mut entry, posting_date);
1574
1575 if batch.remaining <= 1 {
1577 self.batch_state = None;
1578 }
1579
1580 entry
1581 }
1582
1583 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1585 let persona_str = &entry.header.user_persona;
1587 let persona = match persona_str.to_lowercase().as_str() {
1588 s if s.contains("junior") => UserPersona::JuniorAccountant,
1589 s if s.contains("senior") => UserPersona::SeniorAccountant,
1590 s if s.contains("controller") => UserPersona::Controller,
1591 s if s.contains("manager") => UserPersona::Manager,
1592 s if s.contains("executive") => UserPersona::Executive,
1593 _ => return, };
1595
1596 let base_error_rate = persona.error_rate();
1598
1599 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1601
1602 if self.rng.random::<f64>() >= adjusted_rate {
1604 return; }
1606
1607 self.inject_human_error(entry, persona);
1609 }
1610
1611 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1620 use chrono::Datelike;
1621
1622 let mut rate = base_rate;
1623 let day = posting_date.day();
1624 let month = posting_date.month();
1625
1626 if month == 12 && day >= 28 {
1628 rate *= 2.0;
1629 return rate.min(0.5); }
1631
1632 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1634 rate *= 1.75; return rate.min(0.4);
1636 }
1637
1638 if day >= 28 {
1640 rate *= 1.5; }
1642
1643 let weekday = posting_date.weekday();
1645 match weekday {
1646 chrono::Weekday::Mon => {
1647 rate *= 1.2;
1649 }
1650 chrono::Weekday::Fri => {
1651 rate *= 1.3;
1653 }
1654 _ => {}
1655 }
1656
1657 rate.min(0.4)
1659 }
1660
1661 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1670 use rust_decimal::Decimal;
1671
1672 if amount < Decimal::from(10) {
1674 return amount;
1675 }
1676
1677 if self.rng.random::<f64>() > 0.70 {
1679 return amount;
1680 }
1681
1682 let variation_type: u8 = self.rng.random_range(0..4);
1684
1685 match variation_type {
1686 0 => {
1687 let variation_pct = self.rng.random_range(-0.02..0.02);
1689 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1690 (amount + variation).round_dp(2)
1691 }
1692 1 => {
1693 let ten = Decimal::from(10);
1695 (amount / ten).round() * ten
1696 }
1697 2 => {
1698 if amount >= Decimal::from(500) {
1700 let hundred = Decimal::from(100);
1701 (amount / hundred).round() * hundred
1702 } else {
1703 amount
1704 }
1705 }
1706 3 => {
1707 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1709 (amount + cents).max(Decimal::ZERO).round_dp(2)
1710 }
1711 _ => amount,
1712 }
1713 }
1714
1715 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1721 let balancing_idx = entry.lines.iter().position(|l| {
1723 if modified_was_debit {
1724 l.credit_amount > Decimal::ZERO
1725 } else {
1726 l.debit_amount > Decimal::ZERO
1727 }
1728 });
1729
1730 if let Some(idx) = balancing_idx {
1731 if modified_was_debit {
1732 entry.lines[idx].credit_amount += impact;
1733 } else {
1734 entry.lines[idx].debit_amount += impact;
1735 }
1736 }
1737 }
1738
1739 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1744 use rust_decimal::Decimal;
1745
1746 let error_type: u8 = match persona {
1748 UserPersona::JuniorAccountant => {
1749 self.rng.random_range(0..5)
1751 }
1752 UserPersona::SeniorAccountant => {
1753 self.rng.random_range(0..3)
1755 }
1756 UserPersona::Controller | UserPersona::Manager => {
1757 self.rng.random_range(3..5)
1759 }
1760 _ => return,
1761 };
1762
1763 match error_type {
1764 0 => {
1765 if let Some(line) = entry.lines.get_mut(0) {
1767 let is_debit = line.debit_amount > Decimal::ZERO;
1768 let original_amount = if is_debit {
1769 line.debit_amount
1770 } else {
1771 line.credit_amount
1772 };
1773
1774 let s = original_amount.to_string();
1776 if s.len() >= 2 {
1777 let chars: Vec<char> = s.chars().collect();
1778 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1779 if chars[pos].is_ascii_digit()
1780 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1781 {
1782 let mut new_chars = chars;
1783 new_chars.swap(pos, pos + 1);
1784 if let Ok(new_amount) =
1785 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1786 {
1787 let impact = new_amount - original_amount;
1788
1789 if is_debit {
1791 entry.lines[0].debit_amount = new_amount;
1792 } else {
1793 entry.lines[0].credit_amount = new_amount;
1794 }
1795
1796 Self::rebalance_entry(entry, is_debit, impact);
1798
1799 entry.header.header_text = Some(
1800 entry.header.header_text.clone().unwrap_or_default()
1801 + " [HUMAN_ERROR:TRANSPOSITION]",
1802 );
1803 }
1804 }
1805 }
1806 }
1807 }
1808 1 => {
1809 if let Some(line) = entry.lines.get_mut(0) {
1811 let is_debit = line.debit_amount > Decimal::ZERO;
1812 let original_amount = if is_debit {
1813 line.debit_amount
1814 } else {
1815 line.credit_amount
1816 };
1817
1818 let new_amount = original_amount * Decimal::new(10, 0);
1819 let impact = new_amount - original_amount;
1820
1821 if is_debit {
1823 entry.lines[0].debit_amount = new_amount;
1824 } else {
1825 entry.lines[0].credit_amount = new_amount;
1826 }
1827
1828 Self::rebalance_entry(entry, is_debit, impact);
1830
1831 entry.header.header_text = Some(
1832 entry.header.header_text.clone().unwrap_or_default()
1833 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1834 );
1835 }
1836 }
1837 2 => {
1838 if let Some(ref mut text) = entry.header.header_text {
1840 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1841 let correct = ["the", "and", "with", "that", "receive"];
1842 let idx = self.rng.random_range(0..typos.len());
1843 if text.to_lowercase().contains(correct[idx]) {
1844 *text = text.replace(correct[idx], typos[idx]);
1845 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1846 }
1847 }
1848 }
1849 3 => {
1850 if let Some(line) = entry.lines.get_mut(0) {
1852 let is_debit = line.debit_amount > Decimal::ZERO;
1853 let original_amount = if is_debit {
1854 line.debit_amount
1855 } else {
1856 line.credit_amount
1857 };
1858
1859 let new_amount =
1860 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1861 let impact = new_amount - original_amount;
1862
1863 if is_debit {
1865 entry.lines[0].debit_amount = new_amount;
1866 } else {
1867 entry.lines[0].credit_amount = new_amount;
1868 }
1869
1870 Self::rebalance_entry(entry, is_debit, impact);
1872
1873 entry.header.header_text = Some(
1874 entry.header.header_text.clone().unwrap_or_default()
1875 + " [HUMAN_ERROR:ROUNDED]",
1876 );
1877 }
1878 }
1879 4 if entry.header.document_date == entry.header.posting_date => {
1882 let days_late = self.rng.random_range(5..15);
1883 entry.header.document_date =
1884 entry.header.posting_date - chrono::Duration::days(days_late);
1885 entry.header.header_text = Some(
1886 entry.header.header_text.clone().unwrap_or_default()
1887 + " [HUMAN_ERROR:LATE_POSTING]",
1888 );
1889 }
1890 _ => {}
1891 }
1892 }
1893
1894 fn maybe_apply_approval_workflow(
1899 &mut self,
1900 entry: &mut JournalEntry,
1901 _posting_date: NaiveDate,
1902 ) {
1903 use rust_decimal::Decimal;
1904
1905 let amount = entry.total_debit();
1906
1907 if amount <= self.approval_threshold {
1909 let workflow = ApprovalWorkflow::auto_approved(
1911 entry.header.created_by.clone(),
1912 entry.header.user_persona.clone(),
1913 amount,
1914 entry.header.created_at,
1915 );
1916 entry.header.approval_workflow = Some(workflow);
1917 return;
1918 }
1919
1920 entry.header.sox_relevant = true;
1922
1923 let required_levels = if amount > Decimal::new(100000, 0) {
1925 3 } else if amount > Decimal::new(50000, 0) {
1927 2 } else {
1929 1 };
1931
1932 let mut workflow = ApprovalWorkflow::new(
1934 entry.header.created_by.clone(),
1935 entry.header.user_persona.clone(),
1936 amount,
1937 );
1938 workflow.required_levels = required_levels;
1939
1940 let submit_time = entry.header.created_at;
1942 let submit_action = ApprovalAction::new(
1943 entry.header.created_by.clone(),
1944 entry.header.user_persona.clone(),
1945 self.parse_persona(&entry.header.user_persona),
1946 ApprovalActionType::Submit,
1947 0,
1948 )
1949 .with_timestamp(submit_time);
1950
1951 workflow.actions.push(submit_action);
1952 workflow.status = ApprovalStatus::Pending;
1953 workflow.submitted_at = Some(submit_time);
1954
1955 let mut current_time = submit_time;
1957 for level in 1..=required_levels {
1958 let delay_hours = self.rng.random_range(1..4);
1960 current_time += chrono::Duration::hours(delay_hours);
1961
1962 while current_time.weekday() == chrono::Weekday::Sat
1964 || current_time.weekday() == chrono::Weekday::Sun
1965 {
1966 current_time += chrono::Duration::days(1);
1967 }
1968
1969 let (approver_id, approver_role) = self.select_approver(level);
1971
1972 let approve_action = ApprovalAction::new(
1973 approver_id.clone(),
1974 approver_role.to_string(),
1975 approver_role,
1976 ApprovalActionType::Approve,
1977 level,
1978 )
1979 .with_timestamp(current_time);
1980
1981 workflow.actions.push(approve_action);
1982 workflow.current_level = level;
1983 }
1984
1985 workflow.status = ApprovalStatus::Approved;
1987 workflow.approved_at = Some(current_time);
1988
1989 entry.header.approval_workflow = Some(workflow);
1990 }
1991
1992 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1994 let persona = match level {
1995 1 => UserPersona::Manager,
1996 2 => UserPersona::Controller,
1997 _ => UserPersona::Executive,
1998 };
1999
2000 if let Some(ref pool) = self.user_pool {
2002 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2003 return (user.user_id.clone(), persona);
2004 }
2005 }
2006
2007 let approver_id = match persona {
2009 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2010 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2011 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2012 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2013 };
2014
2015 (approver_id, persona)
2016 }
2017
2018 fn parse_persona(&self, persona_str: &str) -> UserPersona {
2020 match persona_str.to_lowercase().as_str() {
2021 s if s.contains("junior") => UserPersona::JuniorAccountant,
2022 s if s.contains("senior") => UserPersona::SeniorAccountant,
2023 s if s.contains("controller") => UserPersona::Controller,
2024 s if s.contains("manager") => UserPersona::Manager,
2025 s if s.contains("executive") => UserPersona::Executive,
2026 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2027 _ => UserPersona::JuniorAccountant, }
2029 }
2030
2031 pub fn with_approval(mut self, enabled: bool) -> Self {
2033 self.approval_enabled = enabled;
2034 self
2035 }
2036
2037 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2039 self.approval_threshold = threshold;
2040 self
2041 }
2042
2043 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2049 self.sod_violation_rate = rate;
2050 self
2051 }
2052
2053 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2056 if let Some(ref workflow) = entry.header.approval_workflow {
2057 let last_approver = workflow
2059 .actions
2060 .iter()
2061 .rev()
2062 .find(|a| matches!(a.action, ApprovalActionType::Approve));
2063
2064 if let Some(approver_action) = last_approver {
2065 entry.header.approved_by = Some(approver_action.actor_id.clone());
2066 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2067 } else {
2068 entry.header.approved_by = Some(workflow.preparer_id.clone());
2070 entry.header.approval_date = Some(posting_date);
2071 }
2072
2073 if self.rng.random::<f64>() < self.sod_violation_rate {
2075 let creator = entry.header.created_by.clone();
2076 entry.header.approved_by = Some(creator);
2077 entry.header.sod_violation = true;
2078 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2079 }
2080 }
2081 }
2082
2083 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2089 self.drift_controller = Some(controller);
2090 self
2091 }
2092
2093 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2098 if config.enabled {
2099 let total_periods = self.calculate_total_periods();
2100 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2101 }
2102 self
2103 }
2104
2105 fn calculate_total_periods(&self) -> u32 {
2107 let start_year = self.start_date.year();
2108 let start_month = self.start_date.month();
2109 let end_year = self.end_date.year();
2110 let end_month = self.end_date.month();
2111
2112 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2113 }
2114
2115 fn date_to_period(&self, date: NaiveDate) -> u32 {
2117 let start_year = self.start_date.year();
2118 let start_month = self.start_date.month() as i32;
2119 let date_year = date.year();
2120 let date_month = date.month() as i32;
2121
2122 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2123 }
2124
2125 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2127 if let Some(ref controller) = self.drift_controller {
2128 let period = self.date_to_period(date);
2129 controller.compute_adjustments(period)
2130 } else {
2131 DriftAdjustments::none()
2132 }
2133 }
2134
2135 #[inline]
2137 fn select_user(&mut self, is_automated: bool) -> (String, String) {
2138 if let Some(ref pool) = self.user_pool {
2139 let persona = if is_automated {
2140 UserPersona::AutomatedSystem
2141 } else {
2142 let roll: f64 = self.rng.random();
2144 if roll < 0.4 {
2145 UserPersona::JuniorAccountant
2146 } else if roll < 0.7 {
2147 UserPersona::SeniorAccountant
2148 } else if roll < 0.85 {
2149 UserPersona::Controller
2150 } else {
2151 UserPersona::Manager
2152 }
2153 };
2154
2155 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2156 return (user.user_id.clone(), user.persona.to_string());
2157 }
2158 }
2159
2160 if is_automated {
2162 (
2163 format!("BATCH{:04}", self.rng.random_range(1..=20)),
2164 "automated_system".to_string(),
2165 )
2166 } else {
2167 (
2168 format!("USER{:04}", self.rng.random_range(1..=40)),
2169 "senior_accountant".to_string(),
2170 )
2171 }
2172 }
2173
2174 #[inline]
2176 fn select_source(&mut self) -> TransactionSource {
2177 let roll: f64 = self.rng.random();
2178 let dist = &self.config.source_distribution;
2179
2180 if roll < dist.manual {
2181 TransactionSource::Manual
2182 } else if roll < dist.manual + dist.automated {
2183 TransactionSource::Automated
2184 } else if roll < dist.manual + dist.automated + dist.recurring {
2185 TransactionSource::Recurring
2186 } else {
2187 TransactionSource::Adjustment
2188 }
2189 }
2190
2191 #[inline]
2193 fn document_type_for_process(process: BusinessProcess) -> &'static str {
2202 match process {
2203 BusinessProcess::P2P => "KR",
2204 BusinessProcess::O2C => "DR",
2205 BusinessProcess::R2R => "SA",
2206 BusinessProcess::H2R => "HR",
2207 BusinessProcess::A2R => "AA",
2208 _ => "SA",
2209 }
2210 }
2211
2212 fn select_business_process(&mut self) -> BusinessProcess {
2213 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2214 }
2215
2216 #[inline]
2217 fn select_debit_account(&mut self) -> &GLAccount {
2218 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2219 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2220
2221 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2223 accounts
2224 } else {
2225 expense_accounts
2226 };
2227
2228 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2229 tracing::warn!(
2230 "Account selection returned empty list, falling back to first COA account"
2231 );
2232 &self.coa.accounts[0]
2233 })
2234 }
2235
2236 #[inline]
2237 fn select_credit_account(&mut self) -> &GLAccount {
2238 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2239 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2240
2241 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2243 liability_accounts
2244 } else {
2245 revenue_accounts
2246 };
2247
2248 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2249 tracing::warn!(
2250 "Account selection returned empty list, falling back to first COA account"
2251 );
2252 &self.coa.accounts[0]
2253 })
2254 }
2255}
2256
2257impl Generator for JournalEntryGenerator {
2258 type Item = JournalEntry;
2259 type Config = (
2260 TransactionConfig,
2261 Arc<ChartOfAccounts>,
2262 Vec<String>,
2263 NaiveDate,
2264 NaiveDate,
2265 );
2266
2267 fn new(config: Self::Config, seed: u64) -> Self {
2268 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2269 }
2270
2271 fn generate_one(&mut self) -> Self::Item {
2272 self.generate()
2273 }
2274
2275 fn reset(&mut self) {
2276 self.rng = seeded_rng(self.seed, 0);
2277 self.line_sampler.reset(self.seed + 1);
2278 self.amount_sampler.reset(self.seed + 2);
2279 self.temporal_sampler.reset(self.seed + 3);
2280 if let Some(ref mut adv) = self.advanced_amount_sampler {
2281 adv.reset(self.seed + 2);
2282 }
2283 self.count = 0;
2284 self.uuid_factory.reset();
2285
2286 let mut ref_gen = ReferenceGenerator::new(
2288 self.start_date.year(),
2289 self.companies
2290 .first()
2291 .map(std::string::String::as_str)
2292 .unwrap_or("1000"),
2293 );
2294 ref_gen.set_prefix(
2295 ReferenceType::Invoice,
2296 &self.template_config.references.invoice_prefix,
2297 );
2298 ref_gen.set_prefix(
2299 ReferenceType::PurchaseOrder,
2300 &self.template_config.references.po_prefix,
2301 );
2302 ref_gen.set_prefix(
2303 ReferenceType::SalesOrder,
2304 &self.template_config.references.so_prefix,
2305 );
2306 self.reference_generator = ref_gen;
2307 }
2308
2309 fn count(&self) -> u64 {
2310 self.count
2311 }
2312
2313 fn seed(&self) -> u64 {
2314 self.seed
2315 }
2316}
2317
2318use datasynth_core::traits::ParallelGenerator;
2319
2320impl ParallelGenerator for JournalEntryGenerator {
2321 fn split(self, parts: usize) -> Vec<Self> {
2327 let parts = parts.max(1);
2328 (0..parts)
2329 .map(|i| {
2330 let sub_seed = self
2332 .seed
2333 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2334
2335 let mut gen = JournalEntryGenerator::new_with_full_config(
2336 self.config.clone(),
2337 Arc::clone(&self.coa),
2338 self.companies.clone(),
2339 self.start_date,
2340 self.end_date,
2341 sub_seed,
2342 self.template_config.clone(),
2343 self.user_pool.clone(),
2344 );
2345
2346 gen.company_selector = self.company_selector.clone();
2348 gen.vendor_pool = self.vendor_pool.clone();
2349 gen.customer_pool = self.customer_pool.clone();
2350 gen.material_pool = self.material_pool.clone();
2351 gen.using_real_master_data = self.using_real_master_data;
2352 gen.fraud_config = self.fraud_config.clone();
2353 gen.persona_errors_enabled = self.persona_errors_enabled;
2354 gen.approval_enabled = self.approval_enabled;
2355 gen.approval_threshold = self.approval_threshold;
2356 gen.sod_violation_rate = self.sod_violation_rate;
2357 if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2362 adv.reset(sub_seed.wrapping_add(2));
2363 gen.advanced_amount_sampler = Some(adv);
2364 }
2365 if let Some(mut cond) = self.conditional_amount_override.clone() {
2368 cond.reset(sub_seed.wrapping_add(17));
2369 gen.conditional_amount_override = Some(cond);
2370 }
2371 if let Some(mut cop) = self.correlation_copula.clone() {
2373 cop.reset(sub_seed.wrapping_add(31));
2374 gen.correlation_copula = Some(cop);
2375 }
2376
2377 gen.uuid_factory = DeterministicUuidFactory::for_partition(
2379 sub_seed,
2380 GeneratorType::JournalEntry,
2381 i as u8,
2382 );
2383
2384 if let Some(ref config) = self.temporal_patterns_config {
2386 gen.temporal_patterns_config = Some(config.clone());
2387 if config.business_days.enabled {
2389 if let Some(ref bdc) = self.business_day_calculator {
2390 gen.business_day_calculator = Some(bdc.clone());
2391 }
2392 }
2393 if config.processing_lags.enabled {
2395 let lag_config =
2396 Self::convert_processing_lag_config(&config.processing_lags);
2397 gen.processing_lag_calculator =
2398 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2399 }
2400 }
2401
2402 if let Some(ref dc) = self.drift_controller {
2404 gen.drift_controller = Some(dc.clone());
2405 }
2406
2407 gen
2408 })
2409 .collect()
2410 }
2411}
2412
2413#[cfg(test)]
2414#[allow(clippy::unwrap_used)]
2415mod tests {
2416 use super::*;
2417 use crate::ChartOfAccountsGenerator;
2418
2419 #[test]
2420 fn test_generate_balanced_entries() {
2421 let mut coa_gen =
2422 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2423 let coa = Arc::new(coa_gen.generate());
2424
2425 let mut je_gen = JournalEntryGenerator::new_with_params(
2426 TransactionConfig::default(),
2427 coa,
2428 vec!["1000".to_string()],
2429 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2430 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2431 42,
2432 );
2433
2434 let mut balanced_count = 0;
2435 for _ in 0..100 {
2436 let entry = je_gen.generate();
2437
2438 let has_human_error = entry
2440 .header
2441 .header_text
2442 .as_ref()
2443 .map(|t| t.contains("[HUMAN_ERROR:"))
2444 .unwrap_or(false);
2445
2446 if !has_human_error {
2447 assert!(
2448 entry.is_balanced(),
2449 "Entry {:?} is not balanced",
2450 entry.header.document_id
2451 );
2452 balanced_count += 1;
2453 }
2454 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2455 }
2456
2457 assert!(
2459 balanced_count >= 80,
2460 "Expected at least 80 balanced entries, got {}",
2461 balanced_count
2462 );
2463 }
2464
2465 #[test]
2466 fn test_deterministic_generation() {
2467 let mut coa_gen =
2468 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2469 let coa = Arc::new(coa_gen.generate());
2470
2471 let mut gen1 = JournalEntryGenerator::new_with_params(
2472 TransactionConfig::default(),
2473 Arc::clone(&coa),
2474 vec!["1000".to_string()],
2475 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2476 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2477 42,
2478 );
2479
2480 let mut gen2 = JournalEntryGenerator::new_with_params(
2481 TransactionConfig::default(),
2482 coa,
2483 vec!["1000".to_string()],
2484 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2485 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2486 42,
2487 );
2488
2489 for _ in 0..50 {
2490 let e1 = gen1.generate();
2491 let e2 = gen2.generate();
2492 assert_eq!(e1.header.document_id, e2.header.document_id);
2493 assert_eq!(e1.total_debit(), e2.total_debit());
2494 }
2495 }
2496
2497 #[test]
2498 fn test_templates_generate_descriptions() {
2499 let mut coa_gen =
2500 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2501 let coa = Arc::new(coa_gen.generate());
2502
2503 let template_config = TemplateConfig {
2505 names: datasynth_config::schema::NameTemplateConfig {
2506 generate_realistic_names: true,
2507 email_domain: "test.com".to_string(),
2508 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2509 },
2510 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2511 generate_header_text: true,
2512 generate_line_text: true,
2513 },
2514 references: datasynth_config::schema::ReferenceTemplateConfig {
2515 generate_references: true,
2516 invoice_prefix: "TEST-INV".to_string(),
2517 po_prefix: "TEST-PO".to_string(),
2518 so_prefix: "TEST-SO".to_string(),
2519 },
2520 path: None,
2521 merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2522 };
2523
2524 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2525 TransactionConfig::default(),
2526 coa,
2527 vec!["1000".to_string()],
2528 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2529 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2530 42,
2531 template_config,
2532 None,
2533 )
2534 .with_persona_errors(false); for _ in 0..10 {
2537 let entry = je_gen.generate();
2538
2539 assert!(
2541 entry.header.header_text.is_some(),
2542 "Header text should be populated"
2543 );
2544
2545 assert!(
2547 entry.header.reference.is_some(),
2548 "Reference should be populated"
2549 );
2550
2551 assert!(
2553 entry.header.business_process.is_some(),
2554 "Business process should be set"
2555 );
2556
2557 for line in &entry.lines {
2559 assert!(line.line_text.is_some(), "Line text should be populated");
2560 }
2561
2562 assert!(entry.is_balanced());
2564 }
2565 }
2566
2567 #[test]
2568 fn test_user_pool_integration() {
2569 let mut coa_gen =
2570 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2571 let coa = Arc::new(coa_gen.generate());
2572
2573 let companies = vec!["1000".to_string()];
2574
2575 let mut user_gen = crate::UserGenerator::new(42);
2577 let user_pool = user_gen.generate_standard(&companies);
2578
2579 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2580 TransactionConfig::default(),
2581 coa,
2582 companies,
2583 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2584 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2585 42,
2586 TemplateConfig::default(),
2587 Some(user_pool),
2588 );
2589
2590 for _ in 0..20 {
2592 let entry = je_gen.generate();
2593
2594 assert!(!entry.header.created_by.is_empty());
2597 }
2598 }
2599
2600 #[test]
2601 fn test_master_data_connection() {
2602 let mut coa_gen =
2603 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2604 let coa = Arc::new(coa_gen.generate());
2605
2606 let vendors = vec![
2608 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2609 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2610 ];
2611
2612 let customers = vec![
2614 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2615 Customer::new(
2616 "C-TEST-002",
2617 "Test Customer Two",
2618 CustomerType::SmallBusiness,
2619 ),
2620 ];
2621
2622 let materials = vec![Material::new(
2624 "MAT-TEST-001",
2625 "Test Material A",
2626 MaterialType::RawMaterial,
2627 )];
2628
2629 let generator = JournalEntryGenerator::new_with_params(
2631 TransactionConfig::default(),
2632 coa,
2633 vec!["1000".to_string()],
2634 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2635 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2636 42,
2637 );
2638
2639 assert!(!generator.is_using_real_master_data());
2641
2642 let generator_with_data = generator
2644 .with_vendors(&vendors)
2645 .with_customers(&customers)
2646 .with_materials(&materials);
2647
2648 assert!(generator_with_data.is_using_real_master_data());
2650 }
2651
2652 #[test]
2653 fn test_with_master_data_convenience_method() {
2654 let mut coa_gen =
2655 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2656 let coa = Arc::new(coa_gen.generate());
2657
2658 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2659 let customers = vec![Customer::new(
2660 "C-001",
2661 "Customer One",
2662 CustomerType::Corporate,
2663 )];
2664 let materials = vec![Material::new(
2665 "MAT-001",
2666 "Material One",
2667 MaterialType::RawMaterial,
2668 )];
2669
2670 let generator = JournalEntryGenerator::new_with_params(
2671 TransactionConfig::default(),
2672 coa,
2673 vec!["1000".to_string()],
2674 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2675 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2676 42,
2677 )
2678 .with_master_data(&vendors, &customers, &materials);
2679
2680 assert!(generator.is_using_real_master_data());
2681 }
2682
2683 #[test]
2684 fn test_stress_factors_increase_error_rate() {
2685 let mut coa_gen =
2686 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2687 let coa = Arc::new(coa_gen.generate());
2688
2689 let generator = JournalEntryGenerator::new_with_params(
2690 TransactionConfig::default(),
2691 coa,
2692 vec!["1000".to_string()],
2693 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2694 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2695 42,
2696 );
2697
2698 let base_rate = 0.1;
2699
2700 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2703 assert!(
2704 (regular_rate - base_rate).abs() < 0.01,
2705 "Regular day should have minimal stress factor adjustment"
2706 );
2707
2708 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2711 assert!(
2712 month_end_rate > regular_rate,
2713 "Month end should have higher error rate than regular day"
2714 );
2715
2716 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2719 assert!(
2720 year_end_rate > month_end_rate,
2721 "Year end should have highest error rate"
2722 );
2723
2724 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2727 assert!(
2728 friday_rate > regular_rate,
2729 "Friday should have higher error rate than mid-week"
2730 );
2731
2732 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2735 assert!(
2736 monday_rate > regular_rate,
2737 "Monday should have higher error rate than mid-week"
2738 );
2739 }
2740
2741 #[test]
2742 fn test_batching_produces_similar_entries() {
2743 let mut coa_gen =
2744 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2745 let coa = Arc::new(coa_gen.generate());
2746
2747 let mut je_gen = JournalEntryGenerator::new_with_params(
2749 TransactionConfig::default(),
2750 coa,
2751 vec!["1000".to_string()],
2752 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2753 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2754 123,
2755 )
2756 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2760
2761 for entry in &entries {
2763 assert!(
2764 entry.is_balanced(),
2765 "All entries including batched should be balanced"
2766 );
2767 }
2768
2769 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2771 std::collections::HashMap::new();
2772 for entry in &entries {
2773 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2774 }
2775
2776 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2778 assert!(
2779 dates_with_multiple > 0,
2780 "With batching, should see some dates with multiple entries"
2781 );
2782 }
2783
2784 #[test]
2785 fn test_temporal_patterns_business_days() {
2786 use datasynth_config::schema::{
2787 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2788 };
2789
2790 let mut coa_gen =
2791 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2792 let coa = Arc::new(coa_gen.generate());
2793
2794 let temporal_config = TemporalPatternsConfig {
2796 enabled: true,
2797 business_days: BusinessDaySchemaConfig {
2798 enabled: true,
2799 ..Default::default()
2800 },
2801 calendars: CalendarSchemaConfig {
2802 regions: vec!["US".to_string()],
2803 custom_holidays: vec![],
2804 },
2805 ..Default::default()
2806 };
2807
2808 let mut je_gen = JournalEntryGenerator::new_with_params(
2809 TransactionConfig::default(),
2810 coa,
2811 vec!["1000".to_string()],
2812 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2813 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2815 )
2816 .with_temporal_patterns(temporal_config, 42)
2817 .with_persona_errors(false);
2818
2819 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2821
2822 for entry in &entries {
2823 let weekday = entry.header.posting_date.weekday();
2824 assert!(
2825 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2826 "Posting date {:?} should not be a weekend",
2827 entry.header.posting_date
2828 );
2829 }
2830 }
2831
2832 #[test]
2833 fn test_default_generation_filters_weekends() {
2834 let mut coa_gen =
2838 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2839 let coa = Arc::new(coa_gen.generate());
2840
2841 let mut je_gen = JournalEntryGenerator::new_with_params(
2842 TransactionConfig::default(),
2843 coa,
2844 vec!["1000".to_string()],
2845 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2846 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2847 42,
2848 )
2849 .with_persona_errors(false);
2850
2851 let total = 500;
2852 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2853
2854 let weekend_count = entries
2855 .iter()
2856 .filter(|e| {
2857 let wd = e.header.posting_date.weekday();
2858 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2859 })
2860 .count();
2861
2862 let weekend_pct = weekend_count as f64 / total as f64;
2863 assert!(
2864 weekend_pct < 0.05,
2865 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2866 but got {:.1}% ({}/{})",
2867 weekend_pct * 100.0,
2868 weekend_count,
2869 total
2870 );
2871 }
2872
2873 #[test]
2874 fn test_document_type_derived_from_business_process() {
2875 let mut coa_gen =
2876 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2877 let coa = Arc::new(coa_gen.generate());
2878
2879 let mut je_gen = JournalEntryGenerator::new_with_params(
2880 TransactionConfig::default(),
2881 coa,
2882 vec!["1000".to_string()],
2883 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2884 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2885 99,
2886 )
2887 .with_persona_errors(false)
2888 .with_batching(false);
2889
2890 let total = 200;
2891 let mut doc_types = std::collections::HashSet::new();
2892 let mut sa_count = 0_usize;
2893
2894 for _ in 0..total {
2895 let entry = je_gen.generate();
2896 let dt = &entry.header.document_type;
2897 doc_types.insert(dt.clone());
2898 if dt == "SA" {
2899 sa_count += 1;
2900 }
2901 }
2902
2903 assert!(
2905 doc_types.len() > 3,
2906 "Expected >3 distinct document types, got {} ({:?})",
2907 doc_types.len(),
2908 doc_types,
2909 );
2910
2911 let sa_pct = sa_count as f64 / total as f64;
2913 assert!(
2914 sa_pct < 0.50,
2915 "Expected SA <50%, got {:.1}% ({}/{})",
2916 sa_pct * 100.0,
2917 sa_count,
2918 total,
2919 );
2920 }
2921
2922 #[test]
2923 fn test_enrich_line_items_account_description() {
2924 let mut coa_gen =
2925 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2926 let coa = Arc::new(coa_gen.generate());
2927
2928 let mut je_gen = JournalEntryGenerator::new_with_params(
2929 TransactionConfig::default(),
2930 coa,
2931 vec!["1000".to_string()],
2932 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2933 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2934 42,
2935 )
2936 .with_persona_errors(false);
2937
2938 let total = 200;
2939 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2940
2941 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2943 let lines_with_desc: usize = entries
2944 .iter()
2945 .flat_map(|e| &e.lines)
2946 .filter(|l| l.account_description.is_some())
2947 .count();
2948
2949 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2950 assert!(
2951 desc_pct > 0.95,
2952 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2953 desc_pct * 100.0,
2954 lines_with_desc,
2955 total_lines,
2956 );
2957 }
2958
2959 #[test]
2960 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2961 let mut coa_gen =
2962 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2963 let coa = Arc::new(coa_gen.generate());
2964
2965 let mut je_gen = JournalEntryGenerator::new_with_params(
2966 TransactionConfig::default(),
2967 coa,
2968 vec!["1000".to_string()],
2969 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2970 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2971 42,
2972 )
2973 .with_persona_errors(false);
2974
2975 let total = 300;
2976 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2977
2978 let expense_lines: Vec<&JournalEntryLine> = entries
2980 .iter()
2981 .flat_map(|e| &e.lines)
2982 .filter(|l| {
2983 let first = l.gl_account.chars().next().unwrap_or('0');
2984 first == '5' || first == '6'
2985 })
2986 .collect();
2987
2988 if !expense_lines.is_empty() {
2989 let with_cc = expense_lines
2990 .iter()
2991 .filter(|l| l.cost_center.is_some())
2992 .count();
2993 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2994 assert!(
2995 cc_pct > 0.80,
2996 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2997 cc_pct * 100.0,
2998 with_cc,
2999 expense_lines.len(),
3000 );
3001 }
3002 }
3003
3004 #[test]
3005 fn test_enrich_line_items_profit_center_and_line_text() {
3006 let mut coa_gen =
3007 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3008 let coa = Arc::new(coa_gen.generate());
3009
3010 let mut je_gen = JournalEntryGenerator::new_with_params(
3011 TransactionConfig::default(),
3012 coa,
3013 vec!["1000".to_string()],
3014 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3015 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3016 42,
3017 )
3018 .with_persona_errors(false);
3019
3020 let total = 100;
3021 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3022
3023 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3024
3025 let with_pc = entries
3027 .iter()
3028 .flat_map(|e| &e.lines)
3029 .filter(|l| l.profit_center.is_some())
3030 .count();
3031 let pc_pct = with_pc as f64 / total_lines as f64;
3032 assert!(
3033 pc_pct > 0.95,
3034 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3035 pc_pct * 100.0,
3036 with_pc,
3037 total_lines,
3038 );
3039
3040 let with_text = entries
3042 .iter()
3043 .flat_map(|e| &e.lines)
3044 .filter(|l| l.line_text.is_some())
3045 .count();
3046 let text_pct = with_text as f64 / total_lines as f64;
3047 assert!(
3048 text_pct > 0.95,
3049 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3050 text_pct * 100.0,
3051 with_text,
3052 total_lines,
3053 );
3054 }
3055
3056 #[test]
3059 fn test_je_has_audit_flags() {
3060 let mut coa_gen =
3061 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3062 let coa = Arc::new(coa_gen.generate());
3063
3064 let mut je_gen = JournalEntryGenerator::new_with_params(
3065 TransactionConfig::default(),
3066 coa,
3067 vec!["1000".to_string()],
3068 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3069 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3070 42,
3071 )
3072 .with_persona_errors(false);
3073
3074 for _ in 0..100 {
3075 let entry = je_gen.generate();
3076
3077 assert!(
3079 !entry.header.source_system.is_empty(),
3080 "source_system should be populated, got empty string"
3081 );
3082
3083 assert!(
3085 !entry.header.created_by.is_empty(),
3086 "created_by should be populated"
3087 );
3088
3089 assert!(
3091 entry.header.created_date.is_some(),
3092 "created_date should be populated"
3093 );
3094 }
3095 }
3096
3097 #[test]
3098 fn test_manual_entry_rate() {
3099 let mut coa_gen =
3100 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3101 let coa = Arc::new(coa_gen.generate());
3102
3103 let mut je_gen = JournalEntryGenerator::new_with_params(
3104 TransactionConfig::default(),
3105 coa,
3106 vec!["1000".to_string()],
3107 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3108 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3109 42,
3110 )
3111 .with_persona_errors(false)
3112 .with_batching(false);
3113
3114 let total = 1000;
3115 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3116
3117 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3118 let manual_rate = manual_count as f64 / total as f64;
3119
3120 assert!(
3123 manual_rate > 0.01 && manual_rate < 0.50,
3124 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3125 manual_rate * 100.0,
3126 manual_count,
3127 total,
3128 );
3129
3130 for entry in &entries {
3132 let source_is_manual = entry.header.source == TransactionSource::Manual;
3133 assert_eq!(
3134 entry.header.is_manual, source_is_manual,
3135 "is_manual should match source == Manual"
3136 );
3137 }
3138 }
3139
3140 #[test]
3141 fn test_manual_source_consistency() {
3142 let mut coa_gen =
3143 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3144 let coa = Arc::new(coa_gen.generate());
3145
3146 let mut je_gen = JournalEntryGenerator::new_with_params(
3147 TransactionConfig::default(),
3148 coa,
3149 vec!["1000".to_string()],
3150 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3151 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3152 42,
3153 )
3154 .with_persona_errors(false)
3155 .with_batching(false);
3156
3157 for _ in 0..500 {
3158 let entry = je_gen.generate();
3159
3160 if entry.header.is_manual {
3161 assert!(
3163 entry.header.source_system == "manual"
3164 || entry.header.source_system == "spreadsheet",
3165 "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
3166 entry.header.source_system,
3167 );
3168 } else {
3169 assert!(
3171 entry.header.source_system != "manual"
3172 && entry.header.source_system != "spreadsheet",
3173 "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
3174 entry.header.source_system,
3175 );
3176 }
3177 }
3178 }
3179
3180 #[test]
3181 fn test_created_date_before_posting() {
3182 let mut coa_gen =
3183 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3184 let coa = Arc::new(coa_gen.generate());
3185
3186 let mut je_gen = JournalEntryGenerator::new_with_params(
3187 TransactionConfig::default(),
3188 coa,
3189 vec!["1000".to_string()],
3190 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3191 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3192 42,
3193 )
3194 .with_persona_errors(false);
3195
3196 for _ in 0..500 {
3197 let entry = je_gen.generate();
3198
3199 if let Some(created_date) = entry.header.created_date {
3200 let created_naive_date = created_date.date();
3201 assert!(
3202 created_naive_date <= entry.header.posting_date,
3203 "created_date ({}) should be <= posting_date ({})",
3204 created_naive_date,
3205 entry.header.posting_date,
3206 );
3207 }
3208 }
3209 }
3210}