1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::{Arc, LazyLock};
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15 TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18 AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19 DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20 PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21 ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34use datasynth_core::distributions::text_taxonomy::{PiiPlaceholderKind, PlaceholderResolver};
35
36static DEFAULT_SOURCE_MIX: LazyLock<
40 datasynth_core::distributions::behavioral_priors::SourceMixPrior,
41> = LazyLock::new(datasynth_core::distributions::behavioral_priors::SourceMixPrior::sap_default);
42
43const DEFAULT_REVERSAL_RATE: f64 = 0.10;
48
49const DEFAULT_ALLOCATION_RATE: f64 = 0.008;
55const FOREIGN_CCYS: &[(&str, f64)] = &[
58 ("EUR", 1.09),
59 ("GBP", 1.27),
60 ("CHF", 1.12),
61 ("CAD", 0.74),
62 ("JPY", 0.0068),
63 ("AUD", 0.66),
64 ("CNY", 0.14),
65];
66const ALLOCATION_MIN_TARGETS: u32 = 30;
69const ALLOCATION_MAX_TARGETS: u32 = 80;
70
71const ZIPF_ALPHA: f64 = 2.0;
75const ZIPF_CAP: usize = 16_384;
78static ZIPF_CUM: LazyLock<Vec<f64>> = LazyLock::new(|| {
82 let mut cum = Vec::with_capacity(ZIPF_CAP + 1);
83 cum.push(0.0);
84 let mut acc = 0.0_f64;
85 for i in 1..=ZIPF_CAP {
86 acc += 1.0 / (i as f64).powf(ZIPF_ALPHA);
87 cum.push(acc);
88 }
89 cum
90});
91
92#[derive(Debug, Default)]
99pub struct MasterDataResolver {
100 pub companies: Vec<String>,
101 pub persons: Vec<String>,
102 pub streets: Vec<String>,
103 pub patients: Vec<String>,
104}
105
106impl PlaceholderResolver for MasterDataResolver {
107 fn resolve(&mut self, kind: PiiPlaceholderKind, rng: &mut dyn rand::Rng) -> String {
108 use rand::RngExt;
109 let (pool, fallback): (&Vec<String>, &str) = match kind {
110 PiiPlaceholderKind::Company => (&self.companies, "Synthetic Company AG"),
111 PiiPlaceholderKind::Person => (&self.persons, "Synthetic Person"),
112 PiiPlaceholderKind::Street => (&self.streets, "Synthetic Street 1"),
113 PiiPlaceholderKind::Patient => (&self.patients, "Synthetic Patient"),
114 };
115 if pool.is_empty() {
116 return fallback.to_string();
117 }
118 let idx = rng.random_range(0..pool.len());
119 pool[idx].clone()
120 }
121}
122
123fn synthetic_patient_pool(_locale: &str) -> Vec<String> {
135 [
136 "Alex Beispiel",
137 "Bea Muster",
138 "Cleo Synthetic",
139 "Demo Example",
140 "Erik Probe",
141 "Fred Testperson",
142 "Gerda Platzhalter",
143 "Hans Demo",
144 ]
145 .iter()
146 .map(|s| s.to_string())
147 .collect()
148}
149
150pub struct JournalEntryGenerator {
152 rng: ChaCha8Rng,
153 source_mix_rng: ChaCha8Rng,
157 recurring_archetypes:
161 std::collections::HashMap<(String, String), Vec<(Vec<String>, Vec<String>)>>,
162 template_rng: ChaCha8Rng,
165 reversal_buffer: Vec<JournalEntry>,
169 reversal_rng: ChaCha8Rng,
172 account_rng: ChaCha8Rng,
177 allocation_rng: ChaCha8Rng,
181 fx_rng: ChaCha8Rng,
185 cond_pair_rng: ChaCha8Rng,
190 cond_pair_sampler: Option<
193 datasynth_core::distributions::source_conditional_pair::SourceConditionalPairSampler,
194 >,
195 current_je_source: Option<String>,
198 seed: u64,
199 config: TransactionConfig,
200 coa: Arc<ChartOfAccounts>,
201 companies: Vec<String>,
202 company_selector: WeightedCompanySelector,
203 line_sampler: LineItemSampler,
204 amount_sampler: AmountSampler,
205 temporal_sampler: TemporalSampler,
206 start_date: NaiveDate,
207 end_date: NaiveDate,
208 count: u64,
209 uuid_factory: DeterministicUuidFactory,
210 user_pool: Option<UserPool>,
212 description_generator: DescriptionGenerator,
213 reference_generator: ReferenceGenerator,
214 template_config: TemplateConfig,
215 vendor_pool: VendorPool,
216 customer_pool: CustomerPool,
217 material_pool: Option<MaterialPool>,
219 cost_center_pool: Vec<String>,
225 profit_center_pool: Vec<String>,
229 using_real_master_data: bool,
231 fraud_config: FraudConfig,
233 persona_errors_enabled: bool,
235 approval_enabled: bool,
237 approval_threshold: rust_decimal::Decimal,
238 sod_violation_rate: f64,
240 batch_state: Option<BatchState>,
242 drift_controller: Option<DriftController>,
244 business_day_calculator: Option<BusinessDayCalculator>,
246 processing_lag_calculator: Option<ProcessingLagCalculator>,
247 temporal_patterns_config: Option<TemporalPatternsConfig>,
248 business_process_weights: [(BusinessProcess, f64); 5],
252 advanced_amount_sampler: Option<AdvancedAmountSampler>,
256 conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
264 correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
270 pub loaded_priors: Option<crate::priors_loader::LoadedPriors>,
274 iet_day_accum: std::collections::HashMap<String, f64>,
278 iet_burst_remaining: std::collections::HashMap<String, u8>,
286 last_tp_by_source: std::collections::HashMap<String, String>,
290 pub velocity_calibrator: Option<crate::velocity_calibrator::VelocityCalibrator>,
293 md_resolver: MasterDataResolver,
297}
298
299const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
300 (BusinessProcess::O2C, 0.35),
301 (BusinessProcess::P2P, 0.30),
302 (BusinessProcess::R2R, 0.20),
303 (BusinessProcess::H2R, 0.10),
304 (BusinessProcess::A2R, 0.05),
305];
306
307impl JournalEntryGenerator {
333 fn supported_conditional_input(field: &str) -> bool {
334 matches!(
335 field,
336 "month"
337 | "quarter"
338 | "year"
339 | "day_of_week"
340 | "day_of_month"
341 | "day_of_year"
342 | "week_of_year"
343 | "is_period_end"
344 | "is_quarter_end"
345 | "is_year_end"
346 | "constant"
347 | ""
348 )
349 }
350
351 fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
352 let input_field = match self
353 .conditional_amount_override
354 .as_ref()
355 .map(|s| s.config().input_field.as_str())
356 {
357 Some(f) => f,
358 None => return 0.0,
359 };
360
361 let is_last_business_day = |d: chrono::NaiveDate| -> bool {
362 let next = d.succ_opt();
366 match next {
367 Some(n) => n.month() != d.month(),
368 None => true,
369 }
370 };
371
372 match input_field {
373 "month" => posting_date.month() as f64,
374 "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
375 "year" => posting_date.year() as f64,
376 "day_of_week" => posting_date.weekday().number_from_monday() as f64,
377 "day_of_month" => posting_date.day() as f64,
378 "day_of_year" => posting_date.ordinal() as f64,
379 "week_of_year" => posting_date.iso_week().week() as f64,
380 "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
381 "is_quarter_end" => {
382 let m = posting_date.month();
383 let is_q_month = matches!(m, 3 | 6 | 9 | 12);
384 f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
385 }
386 "is_year_end" => f64::from(u8::from(
387 posting_date.month() == 12 && is_last_business_day(posting_date),
388 )),
389 _ => 0.0,
390 }
391 }
392}
393
394fn industry_profile_to_log_normal(
395 p: datasynth_config::schema::IndustryProfileType,
396) -> datasynth_core::distributions::LogNormalMixtureConfig {
397 use datasynth_config::schema::IndustryProfileType as P;
398 let industry = match p {
399 P::Retail => IndustryType::Retail,
400 P::Manufacturing => IndustryType::Manufacturing,
401 P::FinancialServices => IndustryType::FinancialServices,
402 P::Healthcare => IndustryType::Healthcare,
403 P::Technology => IndustryType::Technology,
404 };
405 IndustryAmountProfile::for_industry(industry).sales_amounts
406}
407
408#[derive(Clone)]
413struct BatchState {
414 base_account_number: String,
416 base_amount: rust_decimal::Decimal,
417 base_business_process: Option<BusinessProcess>,
418 base_posting_date: NaiveDate,
419 remaining: u8,
421}
422
423impl JournalEntryGenerator {
424 pub fn new_with_params(
426 config: TransactionConfig,
427 coa: Arc<ChartOfAccounts>,
428 companies: Vec<String>,
429 start_date: NaiveDate,
430 end_date: NaiveDate,
431 seed: u64,
432 ) -> Self {
433 Self::new_with_full_config(
434 config,
435 coa,
436 companies,
437 start_date,
438 end_date,
439 seed,
440 TemplateConfig::default(),
441 None,
442 )
443 }
444
445 #[allow(clippy::too_many_arguments)]
447 pub fn new_with_full_config(
448 config: TransactionConfig,
449 coa: Arc<ChartOfAccounts>,
450 companies: Vec<String>,
451 start_date: NaiveDate,
452 end_date: NaiveDate,
453 seed: u64,
454 template_config: TemplateConfig,
455 user_pool: Option<UserPool>,
456 ) -> Self {
457 let user_pool = user_pool.or_else(|| {
459 if template_config.names.generate_realistic_names {
460 let user_gen_config = UserGeneratorConfig {
461 culture_distribution: vec![
462 (
463 datasynth_core::templates::NameCulture::WesternUs,
464 template_config.names.culture_distribution.western_us,
465 ),
466 (
467 datasynth_core::templates::NameCulture::Hispanic,
468 template_config.names.culture_distribution.hispanic,
469 ),
470 (
471 datasynth_core::templates::NameCulture::German,
472 template_config.names.culture_distribution.german,
473 ),
474 (
475 datasynth_core::templates::NameCulture::French,
476 template_config.names.culture_distribution.french,
477 ),
478 (
479 datasynth_core::templates::NameCulture::Chinese,
480 template_config.names.culture_distribution.chinese,
481 ),
482 (
483 datasynth_core::templates::NameCulture::Japanese,
484 template_config.names.culture_distribution.japanese,
485 ),
486 (
487 datasynth_core::templates::NameCulture::Indian,
488 template_config.names.culture_distribution.indian,
489 ),
490 ],
491 email_domain: template_config.names.email_domain.clone(),
492 generate_realistic_names: true,
493 };
494 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
495 Some(user_gen.generate_standard(&companies))
496 } else {
497 None
498 }
499 });
500
501 let mut ref_gen = ReferenceGenerator::new(
503 start_date.year(),
504 companies
505 .first()
506 .map(std::string::String::as_str)
507 .unwrap_or("1000"),
508 );
509 ref_gen.set_prefix(
510 ReferenceType::Invoice,
511 &template_config.references.invoice_prefix,
512 );
513 ref_gen.set_prefix(
514 ReferenceType::PurchaseOrder,
515 &template_config.references.po_prefix,
516 );
517 ref_gen.set_prefix(
518 ReferenceType::SalesOrder,
519 &template_config.references.so_prefix,
520 );
521
522 let company_selector = WeightedCompanySelector::uniform(companies.clone());
524
525 Self {
526 rng: seeded_rng(seed, 0),
527 source_mix_rng: seeded_rng(seed, 50_063),
528 recurring_archetypes: std::collections::HashMap::new(),
529 template_rng: seeded_rng(seed, 70_081),
530 reversal_buffer: Vec::new(),
531 reversal_rng: seeded_rng(seed, 90_017),
532 account_rng: seeded_rng(seed, 60_071),
533 allocation_rng: seeded_rng(seed, 80_023),
534 fx_rng: seeded_rng(seed, 70_093),
535 cond_pair_rng: seeded_rng(seed, 110_071),
536 cond_pair_sampler: None,
537 current_je_source: None,
538 seed,
539 config: config.clone(),
540 coa,
541 companies,
542 company_selector,
543 line_sampler: LineItemSampler::with_config(
544 seed + 1,
545 config.line_item_distribution.clone(),
546 config.even_odd_distribution.clone(),
547 config.debit_credit_distribution.clone(),
548 ),
549 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
550 temporal_sampler: TemporalSampler::with_config(
551 seed + 3,
552 config.seasonality.clone(),
553 WorkingHoursConfig::default(),
554 Vec::new(),
555 ),
556 start_date,
557 end_date,
558 count: 0,
559 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
560 user_pool,
561 description_generator: DescriptionGenerator::new(),
562 reference_generator: ref_gen,
563 template_config,
564 vendor_pool: VendorPool::standard(),
565 customer_pool: CustomerPool::standard(),
566 material_pool: None,
567 cost_center_pool: Vec::new(),
568 profit_center_pool: Vec::new(),
569 using_real_master_data: false,
570 fraud_config: FraudConfig::default(),
571 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
576 drift_controller: None,
577 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
580 Region::US,
581 start_date.year(),
582 ))),
583 processing_lag_calculator: None,
584 temporal_patterns_config: None,
585 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
586 advanced_amount_sampler: None,
587 conditional_amount_override: None,
588 correlation_copula: None,
589 loaded_priors: None,
590 iet_day_accum: std::collections::HashMap::new(),
591 iet_burst_remaining: std::collections::HashMap::new(),
592 last_tp_by_source: std::collections::HashMap::new(),
593 velocity_calibrator: None,
594 md_resolver: MasterDataResolver::default(),
595 }
596 }
597
598 pub fn set_advanced_distributions(
617 &mut self,
618 config: &AdvancedDistributionConfig,
619 seed: u64,
620 ) -> Result<(), String> {
621 if !config.enabled {
622 return Ok(());
623 }
624
625 self.conditional_amount_override = config
631 .conditional
632 .iter()
633 .find(|c| {
634 c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
635 })
636 .and_then(|c| {
637 datasynth_core::distributions::ConditionalSampler::new(
638 seed.wrapping_add(17),
639 c.to_core_config(),
640 )
641 .ok()
642 });
643
644 self.correlation_copula = config
650 .correlations
651 .to_core_config_for_pair("amount", "line_count")
652 .and_then(|copula_cfg| {
653 datasynth_core::distributions::BivariateCopulaSampler::new(
654 seed.wrapping_add(31),
655 copula_cfg,
656 )
657 .ok()
658 });
659
660 if let Some(pareto) = &config.pareto {
665 if pareto.enabled {
666 let core_cfg = pareto.to_core_config();
667 self.advanced_amount_sampler =
668 Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
669 return Ok(());
670 }
671 }
672
673 if !config.amounts.enabled {
674 return Ok(());
675 }
676
677 match config.amounts.distribution_type {
678 MixtureDistributionType::LogNormal => {
679 let lognormal_cfg = config.amounts.to_log_normal_config().or_else(|| {
680 config
681 .industry_profile
682 .as_ref()
683 .map(|p| industry_profile_to_log_normal(p.profile_type()))
684 });
685 if let Some(cfg) = lognormal_cfg {
686 self.advanced_amount_sampler =
687 Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
688 }
689 }
690 MixtureDistributionType::Gaussian => {
691 if let Some(cfg) = config.amounts.to_gaussian_config() {
692 self.advanced_amount_sampler =
693 Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
694 }
695 }
696 }
697
698 Ok(())
699 }
700
701 pub fn set_business_process_weights(
705 &mut self,
706 o2c: f64,
707 p2p: f64,
708 r2r: f64,
709 h2r: f64,
710 a2r: f64,
711 ) {
712 self.business_process_weights = [
713 (BusinessProcess::O2C, o2c),
714 (BusinessProcess::P2P, p2p),
715 (BusinessProcess::R2R, r2r),
716 (BusinessProcess::H2R, h2r),
717 (BusinessProcess::A2R, a2r),
718 ];
719 }
720
721 pub fn from_generator_config(
726 full_config: &GeneratorConfig,
727 coa: Arc<ChartOfAccounts>,
728 start_date: NaiveDate,
729 end_date: NaiveDate,
730 seed: u64,
731 ) -> Self {
732 let companies: Vec<String> = full_config
733 .companies
734 .iter()
735 .map(|c| c.code.clone())
736 .collect();
737
738 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
740
741 let mut generator = Self::new_with_full_config(
742 full_config.transactions.clone(),
743 coa,
744 companies,
745 start_date,
746 end_date,
747 seed,
748 full_config.templates.clone(),
749 None,
750 );
751
752 generator.company_selector = company_selector;
754
755 generator.fraud_config = full_config.fraud.clone();
757
758 let temporal_config = &full_config.temporal_patterns;
760 if temporal_config.enabled {
761 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
762 }
763
764 generator
765 }
766
767 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
774 if config.business_days.enabled {
776 let region = config
777 .calendars
778 .regions
779 .first()
780 .map(|r| Self::parse_region(r))
781 .unwrap_or(Region::US);
782
783 let calendar = HolidayCalendar::new(region, self.start_date.year());
784 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
785 }
786
787 if config.processing_lags.enabled {
789 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
790 self.processing_lag_calculator =
791 Some(ProcessingLagCalculator::with_config(seed, lag_config));
792 }
793
794 let model = config.period_end.model.as_deref().unwrap_or("flat");
796 if model != "flat"
797 || config
798 .period_end
799 .month_end
800 .as_ref()
801 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
802 {
803 let dynamics = Self::convert_period_end_config(&config.period_end);
804 self.temporal_sampler.set_period_end_dynamics(dynamics);
805 }
806
807 self.temporal_patterns_config = Some(config);
808 self
809 }
810
811 pub fn with_country_pack_temporal(
819 mut self,
820 config: TemporalPatternsConfig,
821 seed: u64,
822 pack: &CountryPack,
823 ) -> Self {
824 if config.business_days.enabled {
826 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
827 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
828 }
829
830 if config.processing_lags.enabled {
832 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
833 self.processing_lag_calculator =
834 Some(ProcessingLagCalculator::with_config(seed, lag_config));
835 }
836
837 let model = config.period_end.model.as_deref().unwrap_or("flat");
839 if model != "flat"
840 || config
841 .period_end
842 .month_end
843 .as_ref()
844 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
845 {
846 let dynamics = Self::convert_period_end_config(&config.period_end);
847 self.temporal_sampler.set_period_end_dynamics(dynamics);
848 }
849
850 self.temporal_patterns_config = Some(config);
851 self
852 }
853
854 fn convert_processing_lag_config(
856 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
857 ) -> ProcessingLagConfig {
858 let mut config = ProcessingLagConfig {
859 enabled: schema.enabled,
860 ..Default::default()
861 };
862
863 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
865 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
866 if let Some(min) = lag.min_hours {
867 dist.min_lag_hours = min;
868 }
869 if let Some(max) = lag.max_hours {
870 dist.max_lag_hours = max;
871 }
872 dist
873 };
874
875 if let Some(ref lag) = schema.sales_order_lag {
877 config
878 .event_lags
879 .insert(EventType::SalesOrder, convert_lag(lag));
880 }
881 if let Some(ref lag) = schema.purchase_order_lag {
882 config
883 .event_lags
884 .insert(EventType::PurchaseOrder, convert_lag(lag));
885 }
886 if let Some(ref lag) = schema.goods_receipt_lag {
887 config
888 .event_lags
889 .insert(EventType::GoodsReceipt, convert_lag(lag));
890 }
891 if let Some(ref lag) = schema.invoice_receipt_lag {
892 config
893 .event_lags
894 .insert(EventType::InvoiceReceipt, convert_lag(lag));
895 }
896 if let Some(ref lag) = schema.invoice_issue_lag {
897 config
898 .event_lags
899 .insert(EventType::InvoiceIssue, convert_lag(lag));
900 }
901 if let Some(ref lag) = schema.payment_lag {
902 config
903 .event_lags
904 .insert(EventType::Payment, convert_lag(lag));
905 }
906 if let Some(ref lag) = schema.journal_entry_lag {
907 config
908 .event_lags
909 .insert(EventType::JournalEntry, convert_lag(lag));
910 }
911
912 if let Some(ref cross_day) = schema.cross_day_posting {
914 config.cross_day = CrossDayConfig {
915 enabled: cross_day.enabled,
916 probability_by_hour: cross_day.probability_by_hour.clone(),
917 ..Default::default()
918 };
919 }
920
921 config
922 }
923
924 fn convert_period_end_config(
926 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
927 ) -> PeriodEndDynamics {
928 let model_type = schema.model.as_deref().unwrap_or("exponential");
929
930 let convert_period =
932 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
933 default_peak: f64|
934 -> PeriodEndConfig {
935 if let Some(p) = period {
936 let model = match model_type {
937 "flat" => PeriodEndModel::FlatMultiplier {
938 multiplier: p.peak_multiplier.unwrap_or(default_peak),
939 },
940 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
941 start_day: p.start_day.unwrap_or(-10),
942 sustained_high_days: p.sustained_high_days.unwrap_or(3),
943 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
944 ramp_up_days: 3, },
946 _ => PeriodEndModel::ExponentialAcceleration {
947 start_day: p.start_day.unwrap_or(-10),
948 base_multiplier: p.base_multiplier.unwrap_or(1.0),
949 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
950 decay_rate: p.decay_rate.unwrap_or(0.3),
951 },
952 };
953 PeriodEndConfig {
954 enabled: true,
955 model,
956 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
957 }
958 } else {
959 PeriodEndConfig {
960 enabled: true,
961 model: PeriodEndModel::ExponentialAcceleration {
962 start_day: -10,
963 base_multiplier: 1.0,
964 peak_multiplier: default_peak,
965 decay_rate: 0.3,
966 },
967 additional_multiplier: 1.0,
968 }
969 }
970 };
971
972 PeriodEndDynamics::new(
973 convert_period(schema.month_end.as_ref(), 2.0),
974 convert_period(schema.quarter_end.as_ref(), 3.5),
975 convert_period(schema.year_end.as_ref(), 5.0),
976 )
977 }
978
979 fn parse_region(region_str: &str) -> Region {
981 match region_str.to_uppercase().as_str() {
982 "US" => Region::US,
983 "DE" => Region::DE,
984 "GB" => Region::GB,
985 "CN" => Region::CN,
986 "JP" => Region::JP,
987 "IN" => Region::IN,
988 "BR" => Region::BR,
989 "MX" => Region::MX,
990 "AU" => Region::AU,
991 "SG" => Region::SG,
992 "KR" => Region::KR,
993 "FR" => Region::FR,
994 "IT" => Region::IT,
995 "ES" => Region::ES,
996 "CA" => Region::CA,
997 _ => Region::US,
998 }
999 }
1000
1001 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
1003 self.company_selector = selector;
1004 }
1005
1006 pub fn company_selector(&self) -> &WeightedCompanySelector {
1008 &self.company_selector
1009 }
1010
1011 pub fn set_fraud_config(&mut self, config: FraudConfig) {
1013 self.fraud_config = config;
1014 }
1015
1016 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
1021 if !vendors.is_empty() {
1022 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
1023 self.using_real_master_data = true;
1024 }
1025 self
1026 }
1027
1028 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
1033 if !customers.is_empty() {
1034 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
1035 self.using_real_master_data = true;
1036 }
1037 self
1038 }
1039
1040 pub fn with_materials(mut self, materials: &[Material]) -> Self {
1044 if !materials.is_empty() {
1045 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
1046 self.using_real_master_data = true;
1047 }
1048 self
1049 }
1050
1051 pub fn with_master_data(
1056 self,
1057 vendors: &[Vendor],
1058 customers: &[Customer],
1059 materials: &[Material],
1060 ) -> Self {
1061 self.with_vendors(vendors)
1062 .with_customers(customers)
1063 .with_materials(materials)
1064 }
1065
1066 fn refresh_md_resolver(&mut self) {
1072 let companies: Vec<String> = self
1073 .vendor_pool
1074 .vendors
1075 .iter()
1076 .map(|v| v.name.clone())
1077 .chain(self.customer_pool.customers.iter().map(|c| c.name.clone()))
1078 .collect();
1079
1080 let persons: Vec<String> = self
1081 .user_pool
1082 .as_ref()
1083 .map(|p| p.users.iter().map(|u| u.display_name.clone()).collect())
1084 .unwrap_or_default();
1085
1086 let streets: Vec<String> = Vec::new(); let patients = synthetic_patient_pool("de_CH");
1088
1089 self.md_resolver = MasterDataResolver {
1090 companies,
1091 persons,
1092 streets,
1093 patients,
1094 };
1095 }
1096
1097 pub fn with_cost_center_pool(mut self, ids: Vec<String>) -> Self {
1106 self.cost_center_pool = ids;
1107 self
1108 }
1109
1110 pub fn with_profit_center_pool(mut self, ids: Vec<String>) -> Self {
1118 self.profit_center_pool = ids;
1119 self
1120 }
1121
1122 pub fn with_user_pool(mut self, pool: UserPool) -> Self {
1131 self.user_pool = Some(pool);
1132 self
1133 }
1134
1135 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
1142 let name_gen =
1143 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
1144 let config = UserGeneratorConfig {
1145 culture_distribution: Vec::new(),
1148 email_domain: name_gen.email_domain().to_string(),
1149 generate_realistic_names: true,
1150 };
1151 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
1152 self.user_pool = Some(user_gen.generate_standard(&self.companies));
1153 self
1154 }
1155
1156 pub fn is_using_real_master_data(&self) -> bool {
1158 self.using_real_master_data
1159 }
1160
1161 fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
1175 if is_manual {
1176 const MANUAL: &[&str] = &[
1179 "manual/standard",
1180 "manual/adjustment",
1181 "manual/reclassification",
1182 "manual/accrual",
1183 "manual/reversal",
1184 "manual/correction",
1185 "spreadsheet/upload",
1186 "spreadsheet/journal",
1187 ];
1188 let idx = (rng.random::<u32>() as usize) % MANUAL.len();
1189 return MANUAL[idx].to_string();
1190 }
1191
1192 let primary: &[&str] = match bp {
1196 BusinessProcess::P2P => &[
1197 "SAP-MM/PO",
1198 "SAP-MM/IV",
1199 "SAP-MM/IM",
1200 "SAP-FI/AP",
1201 "Interface/EDI",
1202 ],
1203 BusinessProcess::O2C => &[
1204 "SAP-SD/ORD",
1205 "SAP-SD/DEL",
1206 "SAP-SD/IV",
1207 "SAP-FI/AR",
1208 "Interface/Lockbox",
1209 ],
1210 BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
1211 BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
1212 BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
1213 BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
1214 BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
1215 _ => &[
1218 "SAP-FI/GL",
1219 "SAP-FI/AP",
1220 "SAP-FI/AR",
1221 "SAP-FI/AA",
1222 "External/SubL",
1223 ],
1224 };
1225
1226 const CROSS: &[&str] = &[
1229 "SAP-FI/GL",
1230 "SAP-FI/AP",
1231 "SAP-FI/AR",
1232 "Interface/EDI",
1233 "Interface/Bank",
1234 "External/SubL",
1235 ];
1236 let pool = if rng.random::<f64>() < 0.80 {
1237 primary
1238 } else {
1239 CROSS
1240 };
1241 let idx = (rng.random::<u32>() as usize) % pool.len();
1242 pool[idx].to_string()
1243 }
1244
1245 fn sample_sap_source_code(&mut self) -> Option<String> {
1252 if let Some(p) = self.loaded_priors.as_ref() {
1253 return Some(p.source_mix.sample(&mut self.rng));
1254 }
1255 if self.config.synthetic_source_codes.unwrap_or(true) {
1256 return Some(DEFAULT_SOURCE_MIX.sample(&mut self.source_mix_rng));
1259 }
1260 None
1261 }
1262
1263 fn pick_recurring_archetype(
1271 &mut self,
1272 company: &str,
1273 doc_type: &str,
1274 debit_count: usize,
1275 credit_count: usize,
1276 ) -> Option<(Vec<String>, Vec<String>)> {
1277 if !self.config.recurring_templates.unwrap_or(true) {
1278 return None;
1279 }
1280 let p_reuse_opt = self.config.archetype_reuse_probability;
1285 if p_reuse_opt.is_none() && self.loaded_priors.is_some() {
1286 return None;
1287 }
1288 let p_reuse = p_reuse_opt.unwrap_or(0.90);
1289 if self.template_rng.random::<f64>() >= p_reuse {
1290 return None;
1291 }
1292 let lib = self
1293 .recurring_archetypes
1294 .get(&(company.to_string(), doc_type.to_string()))?;
1295 let matching: Vec<&(Vec<String>, Vec<String>)> = lib
1296 .iter()
1297 .filter(|(d, c)| d.len() == debit_count && c.len() == credit_count)
1298 .collect();
1299 if matching.is_empty() {
1300 return None;
1301 }
1302 let idx = Self::power_law_index(matching.len(), &mut self.template_rng).unwrap_or(0);
1310 Some(matching[idx].clone())
1311 }
1312
1313 fn cache_recurring_archetype(
1316 &mut self,
1317 company: &str,
1318 doc_type: &str,
1319 debit: Vec<String>,
1320 credit: Vec<String>,
1321 ) {
1322 if self.loaded_priors.is_some() || !self.config.recurring_templates.unwrap_or(true) {
1323 return;
1324 }
1325 if debit.is_empty() && credit.is_empty() {
1326 return;
1327 }
1328 const CAP: usize = 24; let lib = self
1330 .recurring_archetypes
1331 .entry((company.to_string(), doc_type.to_string()))
1332 .or_default();
1333 if lib.len() < CAP {
1334 lib.push((debit, credit));
1335 }
1336 }
1337
1338 fn maybe_generate_reversal(&mut self) -> Option<JournalEntry> {
1345 let rate = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE);
1346 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1347 return None;
1348 }
1349 if self.reversal_rng.random::<f64>() >= rate {
1350 return None;
1351 }
1352 let pick = (self.reversal_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1353 let mut entry = self.reversal_buffer.remove(pick);
1357 let orig_id = entry.header.document_id;
1358 let offset = 1 + (self.reversal_rng.random::<u32>() % 7) as i64;
1360 let mut rev_date = entry.header.posting_date + chrono::Duration::days(offset);
1361 if let Some(ref calc) = self.business_day_calculator {
1362 if !calc.is_business_day(rev_date) {
1363 rev_date = calc.next_business_day(rev_date, false);
1364 }
1365 }
1366 if rev_date > self.end_date {
1367 rev_date = entry.header.posting_date;
1368 }
1369 let rev_id =
1371 uuid::Uuid::from_u128(orig_id.as_u128() ^ 0x5245_5645_5253_414c_5245_5645_5253_414c);
1372 entry.header.document_id = rev_id;
1375 entry.header.posting_date = rev_date;
1376 entry.header.document_date = rev_date;
1377 entry.header.fiscal_year = rev_date.year() as u16;
1378 entry.header.fiscal_period = rev_date.month() as u8;
1379 entry.header.header_text = Some(format!("Reversal of {orig_id}"));
1380 entry.header.reference = Some(format!("REV-{orig_id}"));
1381 entry.header.batch_id = None;
1382 for line in entry.lines.iter_mut() {
1383 std::mem::swap(&mut line.debit_amount, &mut line.credit_amount);
1384 line.document_id = rev_id;
1385 }
1386 Some(entry)
1387 }
1388
1389 fn record_for_reversal(&mut self, entry: &JournalEntry) {
1393 let reversal_on = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE) > 0.0;
1394 let allocation_on = self
1395 .config
1396 .allocation_batch_rate
1397 .unwrap_or(DEFAULT_ALLOCATION_RATE)
1398 > 0.0;
1399 if (!reversal_on && !allocation_on) || entry.lines.is_empty() {
1400 return;
1401 }
1402 const CAP: usize = 64;
1403 if self.reversal_buffer.len() >= CAP {
1404 self.reversal_buffer.remove(0);
1405 }
1406 self.reversal_buffer.push(entry.clone());
1407 }
1408
1409 fn maybe_apply_foreign_currency(&mut self, entry: &mut JournalEntry) {
1417 let prob = self.config.foreign_currency_rate.unwrap_or(0.0);
1418 if prob <= 0.0 || self.fx_rng.random::<f64>() >= prob {
1419 return;
1420 }
1421 let (code, rate) = FOREIGN_CCYS[self.fx_rng.random_range(0..FOREIGN_CCYS.len())];
1422 let rate_dec = match Decimal::from_f64_retain(rate) {
1423 Some(r) if r > Decimal::ZERO => r,
1424 _ => return,
1425 };
1426 entry.header.currency = code.to_string();
1427 entry.header.exchange_rate = rate_dec;
1428 for line in entry.lines.iter_mut() {
1429 let ledger = line.debit_amount + line.credit_amount; line.transaction_amount = Some((ledger / rate_dec).round_dp(2));
1431 }
1432 }
1433
1434 fn split_amount(total: Decimal, n: usize, rng: &mut ChaCha8Rng) -> Vec<Decimal> {
1439 let n = n.max(1);
1440 let total_cents = (total.round_dp(2) * Decimal::from(100))
1441 .to_i64()
1442 .unwrap_or(0);
1443 if n == 1 || total_cents < n as i64 {
1444 return vec![total];
1445 }
1446 let weights: Vec<f64> = (0..n).map(|_| 0.5 + rng.random::<f64>()).collect();
1447 let sumw: f64 = weights.iter().sum::<f64>().max(f64::EPSILON);
1448 let spare = total_cents - n as i64; let mut cents: Vec<i64> = weights
1450 .iter()
1451 .map(|w| 1 + (spare as f64 * w / sumw).floor() as i64)
1452 .collect();
1453 let assigned: i64 = cents.iter().sum();
1455 let leftover = total_cents - assigned;
1456 if let Some(maxp) = cents.iter_mut().max_by_key(|c| **c) {
1457 *maxp += leftover;
1458 }
1459 cents.into_iter().map(|c| Decimal::new(c, 2)).collect()
1460 }
1461
1462 fn business_unit_for_dimension(dim: &str) -> String {
1468 const N_BU: u32 = 11;
1469 let mut h: u32 = 0x811c_9dc5;
1470 for b in dim.bytes() {
1471 h ^= b as u32;
1472 h = h.wrapping_mul(0x0100_0193);
1473 }
1474 format!("BU{:02}", (h % N_BU) + 1)
1475 }
1476
1477 fn maybe_generate_allocation_batch(&mut self) -> Option<JournalEntry> {
1485 let rate = self
1486 .config
1487 .allocation_batch_rate
1488 .unwrap_or(DEFAULT_ALLOCATION_RATE);
1489 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1490 return None;
1491 }
1492 if self.allocation_rng.random::<f64>() >= rate {
1493 return None;
1494 }
1495 let pick = (self.allocation_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1496 let mut entry = self.reversal_buffer.remove(pick);
1499 let idx = entry
1501 .lines
1502 .iter()
1503 .enumerate()
1504 .filter(|(_, l)| l.debit_amount > Decimal::ZERO)
1505 .max_by(|a, b| a.1.debit_amount.cmp(&b.1.debit_amount))
1506 .map(|(i, _)| i)?;
1507 let template = entry.lines[idx].clone();
1508 let n = self
1509 .allocation_rng
1510 .random_range(ALLOCATION_MIN_TARGETS..=ALLOCATION_MAX_TARGETS) as usize;
1511 let parts = Self::split_amount(template.debit_amount, n, &mut self.allocation_rng);
1512 if parts.len() < ALLOCATION_MIN_TARGETS as usize {
1513 return None;
1515 }
1516 let company_code = entry.header.company_code.clone();
1518 let cc_pool: Vec<String> = if self.cost_center_pool.is_empty() {
1519 Self::COST_CENTER_POOL
1520 .iter()
1521 .map(|s| s.to_string())
1522 .collect()
1523 } else {
1524 let needle = format!("-{company_code}-");
1525 let filtered: Vec<String> = self
1526 .cost_center_pool
1527 .iter()
1528 .filter(|id| id.contains(&needle))
1529 .cloned()
1530 .collect();
1531 if filtered.is_empty() {
1532 self.cost_center_pool.clone()
1533 } else {
1534 filtered
1535 }
1536 };
1537 let mut new_lines: Vec<JournalEntryLine> =
1538 Vec::with_capacity(entry.lines.len() + parts.len() - 1);
1539 for (j, line) in entry.lines.iter().enumerate() {
1540 if j == idx {
1541 let bu_on = self.config.business_unit_dimension.unwrap_or(true);
1542 for (k, part) in parts.iter().enumerate() {
1543 let mut nl = template.clone();
1544 nl.debit_amount = *part;
1545 nl.credit_amount = Decimal::ZERO;
1546 nl.cost_center = Some(cc_pool[k % cc_pool.len()].clone());
1547 if bu_on {
1550 nl.business_unit = nl
1551 .cost_center
1552 .as_deref()
1553 .map(Self::business_unit_for_dimension);
1554 }
1555 new_lines.push(nl);
1556 }
1557 } else {
1558 new_lines.push(line.clone());
1559 }
1560 }
1561 let base_id = entry.header.document_id;
1563 let alloc_id =
1564 uuid::Uuid::from_u128(base_id.as_u128() ^ 0xA110_CA70_A110_CA70_A110_CA70_A110_CA70);
1565 entry.header.document_id = alloc_id;
1566 entry.header.sap_source_code = Some("AB".to_string());
1567 entry.header.header_text = Some("Allocation/assessment cycle".to_string());
1568 entry.header.reference = Some(format!("ALLOC-{base_id}"));
1569 entry.header.batch_id = None;
1570 for (i, line) in new_lines.iter_mut().enumerate() {
1571 line.line_number = (i + 1) as u32;
1572 line.document_id = alloc_id;
1573 }
1574 entry.lines = new_lines.into();
1575 Some(entry)
1576 }
1577
1578 fn determine_fraud(&mut self, business_process: BusinessProcess) -> Option<FraudType> {
1579 if !self.fraud_config.enabled {
1580 return None;
1581 }
1582
1583 let process_slug = match business_process {
1592 BusinessProcess::P2P => "P2P",
1593 BusinessProcess::O2C => "O2C",
1594 BusinessProcess::R2R => "R2R",
1595 BusinessProcess::H2R => "H2R",
1596 BusinessProcess::A2R => "A2R",
1597 BusinessProcess::S2C => "S2C",
1598 BusinessProcess::Mfg => "MFG",
1599 BusinessProcess::Bank => "BANK",
1600 BusinessProcess::Audit => "AUDIT",
1601 BusinessProcess::Treasury => "TREASURY",
1602 BusinessProcess::Tax => "TAX",
1603 BusinessProcess::Intercompany => "INTERCOMPANY",
1604 BusinessProcess::ProjectAccounting => "PROJECT",
1605 BusinessProcess::Esg => "ESG",
1606 };
1607 let effective_rate = self
1608 .fraud_config
1609 .per_process_rates
1610 .get(process_slug)
1611 .copied()
1612 .unwrap_or(self.fraud_config.fraud_rate);
1613
1614 if self.rng.random::<f64>() >= effective_rate {
1616 return None;
1617 }
1618
1619 Some(self.select_fraud_type())
1621 }
1622
1623 fn select_fraud_type(&mut self) -> FraudType {
1625 let dist = &self.fraud_config.fraud_type_distribution;
1626 let roll: f64 = self.rng.random();
1627
1628 let mut cumulative = 0.0;
1629
1630 cumulative += dist.suspense_account_abuse;
1631 if roll < cumulative {
1632 return FraudType::SuspenseAccountAbuse;
1633 }
1634
1635 cumulative += dist.fictitious_transaction;
1636 if roll < cumulative {
1637 return FraudType::FictitiousTransaction;
1638 }
1639
1640 cumulative += dist.revenue_manipulation;
1641 if roll < cumulative {
1642 return FraudType::RevenueManipulation;
1643 }
1644
1645 cumulative += dist.expense_capitalization;
1646 if roll < cumulative {
1647 return FraudType::ExpenseCapitalization;
1648 }
1649
1650 cumulative += dist.split_transaction;
1651 if roll < cumulative {
1652 return FraudType::SplitTransaction;
1653 }
1654
1655 cumulative += dist.timing_anomaly;
1656 if roll < cumulative {
1657 return FraudType::TimingAnomaly;
1658 }
1659
1660 cumulative += dist.unauthorized_access;
1661 if roll < cumulative {
1662 return FraudType::UnauthorizedAccess;
1663 }
1664
1665 cumulative += dist.duplicate_payment;
1666 if roll < cumulative {
1667 return FraudType::DuplicatePayment;
1668 }
1669
1670 cumulative += dist.kickback_scheme;
1671 if roll < cumulative {
1672 return FraudType::KickbackScheme;
1673 }
1674
1675 cumulative += dist.round_tripping;
1676 if roll < cumulative {
1677 return FraudType::RoundTripping;
1678 }
1679
1680 cumulative += dist.unauthorized_discount;
1681 if roll < cumulative {
1682 return FraudType::UnauthorizedDiscount;
1683 }
1684
1685 FraudType::DuplicatePayment
1687 }
1688
1689 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1691 match fraud_type {
1692 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1693 FraudAmountPattern::ThresholdAdjacent
1694 }
1695 FraudType::FictitiousTransaction
1696 | FraudType::FictitiousEntry
1697 | FraudType::SuspenseAccountAbuse
1698 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1699 FraudType::RevenueManipulation
1700 | FraudType::ExpenseCapitalization
1701 | FraudType::ImproperCapitalization
1702 | FraudType::ReserveManipulation
1703 | FraudType::UnauthorizedAccess
1704 | FraudType::PrematureRevenue
1705 | FraudType::UnderstatedLiabilities
1706 | FraudType::OverstatedAssets
1707 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1708 FraudType::DuplicatePayment
1709 | FraudType::TimingAnomaly
1710 | FraudType::SelfApproval
1711 | FraudType::ExceededApprovalLimit
1712 | FraudType::SegregationOfDutiesViolation
1713 | FraudType::UnauthorizedApproval
1714 | FraudType::CollusiveApproval
1715 | FraudType::FictitiousVendor
1716 | FraudType::ShellCompanyPayment
1717 | FraudType::Kickback
1718 | FraudType::KickbackScheme
1719 | FraudType::UnauthorizedDiscount
1720 | FraudType::RoundTripping
1721 | FraudType::InvoiceManipulation
1722 | FraudType::AssetMisappropriation
1723 | FraudType::InventoryTheft
1724 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1725 FraudType::ImproperRevenueRecognition
1727 | FraudType::ImproperPoAllocation
1728 | FraudType::VariableConsiderationManipulation
1729 | FraudType::ContractModificationMisstatement => {
1730 FraudAmountPattern::StatisticallyImprobable
1731 }
1732 FraudType::LeaseClassificationManipulation
1734 | FraudType::OffBalanceSheetLease
1735 | FraudType::LeaseLiabilityUnderstatement
1736 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1737 FraudType::FairValueHierarchyManipulation
1739 | FraudType::Level3InputManipulation
1740 | FraudType::ValuationTechniqueManipulation => {
1741 FraudAmountPattern::StatisticallyImprobable
1742 }
1743 FraudType::DelayedImpairment
1745 | FraudType::ImpairmentTestAvoidance
1746 | FraudType::CashFlowProjectionManipulation
1747 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1748 FraudType::BidRigging
1750 | FraudType::PhantomVendorContract
1751 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1752 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1753 FraudType::GhostEmployeePayroll
1755 | FraudType::PayrollInflation
1756 | FraudType::DuplicateExpenseReport
1757 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1758 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1759 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1761 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1762 }
1763 }
1764
1765 #[inline]
1767 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1768 self.uuid_factory.next()
1769 }
1770
1771 const COST_CENTER_POOL: &'static [&'static str] =
1773 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1774
1775 fn enrich_line_items(&mut self, entry: &mut JournalEntry) {
1784 let posting_date = entry.header.posting_date;
1785 let company_code = &entry.header.company_code;
1786 let header_text = entry.header.header_text.clone();
1787 let business_process = entry.header.business_process;
1788 let doc_type_key = entry.header.document_type.clone();
1791
1792 let header_sap_code: Option<String> = entry.header.sap_source_code.clone();
1796
1797 let (cc_pc_neighbor_vec, cc_pc_share_prob): (Vec<String>, f64) =
1801 if let Some(priors) = &self.loaded_priors {
1802 if let Some(motifs) = &priors.cross_entity_motifs {
1803 (
1804 motifs.neighbors(&doc_type_key).to_vec(),
1805 motifs.should_share(&doc_type_key),
1806 )
1807 } else {
1808 (Vec::new(), 0.0)
1809 }
1810 } else {
1811 (Vec::new(), 0.0)
1812 };
1813
1814 let doc_id_bytes = entry.header.document_id.as_bytes();
1816 let mut cc_seed: usize = 0;
1817 for &b in doc_id_bytes {
1818 cc_seed = cc_seed.wrapping_add(b as usize);
1819 }
1820
1821 for (i, line) in entry.lines.iter_mut().enumerate() {
1822 if line.account_description.is_none() {
1824 line.account_description = self
1825 .coa
1826 .get_account(&line.gl_account)
1827 .map(|a| a.short_description.clone());
1828 }
1829
1830 if line.cost_center.is_none() {
1849 let priors_opt = &mut self.loaded_priors;
1854 let rng_ref = &mut self.rng;
1855 if let Some(priors) = priors_opt {
1856 let sp37_cc = header_sap_code.as_deref().and_then(|code| {
1857 priors.sample_attribute_for_source(code, "cost_center", rng_ref)
1858 });
1859 if sp37_cc.is_some() {
1860 line.cost_center = sp37_cc;
1861 } else if let Some(sampler) = priors.fanout_samplers.get_mut("CostCenter") {
1862 line.cost_center = Some(sampler.pick_for_with_neighbors(
1863 &doc_type_key,
1864 &cc_pc_neighbor_vec,
1865 cc_pc_share_prob,
1866 rng_ref,
1867 ));
1868 }
1869 }
1870 }
1871 if line.cost_center.is_none() {
1872 let first_char = line.gl_account.chars().next().unwrap_or('0');
1873 if first_char == '5' || first_char == '6' {
1874 if !self.cost_center_pool.is_empty() {
1875 let needle = format!("-{company_code}-");
1876 let candidates: Vec<&String> = self
1877 .cost_center_pool
1878 .iter()
1879 .filter(|id| id.contains(&needle))
1880 .collect();
1881 let pool: Vec<&String> = if candidates.is_empty() {
1882 self.cost_center_pool.iter().collect()
1883 } else {
1884 candidates
1885 };
1886 let idx = cc_seed.wrapping_add(i) % pool.len();
1887 line.cost_center = Some(pool[idx].clone());
1888 } else {
1889 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1890 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1891 }
1892 }
1893 }
1894
1895 if line.profit_center.is_none() {
1903 let priors_opt = &mut self.loaded_priors;
1908 let rng_ref = &mut self.rng;
1909 if let Some(priors) = priors_opt {
1910 let sp37_pc = header_sap_code.as_deref().and_then(|code| {
1911 priors.sample_attribute_for_source(code, "profit_center", rng_ref)
1912 });
1913 if sp37_pc.is_some() {
1914 line.profit_center = sp37_pc;
1915 } else if let Some(sampler) = priors.fanout_samplers.get_mut("ProfitCenter") {
1916 line.profit_center = Some(sampler.pick_for_with_neighbors(
1917 &doc_type_key,
1918 &cc_pc_neighbor_vec,
1919 cc_pc_share_prob,
1920 rng_ref,
1921 ));
1922 }
1923 }
1924 }
1925 if line.profit_center.is_none() {
1926 if !self.profit_center_pool.is_empty() {
1927 let needle = format!("-{company_code}-");
1928 let candidates: Vec<&String> = self
1929 .profit_center_pool
1930 .iter()
1931 .filter(|id| id.contains(&needle))
1932 .collect();
1933 let pool: Vec<&String> = if candidates.is_empty() {
1934 self.profit_center_pool.iter().collect()
1935 } else {
1936 candidates
1937 };
1938 let idx = cc_seed.wrapping_add(i) % pool.len();
1939 line.profit_center = Some(pool[idx].clone());
1940 } else {
1941 let suffix = match business_process {
1942 Some(BusinessProcess::P2P) => "-P2P",
1943 Some(BusinessProcess::O2C) => "-O2C",
1944 Some(BusinessProcess::R2R) => "-R2R",
1945 Some(BusinessProcess::H2R) => "-H2R",
1946 _ => "",
1947 };
1948 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1949 }
1950 }
1951
1952 if line.business_unit.is_none() && self.config.business_unit_dimension.unwrap_or(true) {
1959 if let Some(dim) = line
1960 .cost_center
1961 .as_deref()
1962 .or(line.profit_center.as_deref())
1963 {
1964 line.business_unit = Some(Self::business_unit_for_dimension(dim));
1965 }
1966 }
1967
1968 if line.trading_partner.is_none() {
1974 line.trading_partner = entry.header.trading_partner.clone();
1975 }
1976
1977 if line.line_text.is_none() {
1979 line.line_text = header_text.clone();
1980 }
1981
1982 if line.value_date.is_none()
1984 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1985 {
1986 line.value_date = Some(posting_date);
1987 }
1988
1989 if line.assignment.is_none() {
1991 if line.gl_account.starts_with("2000") {
1992 if let Some(ref ht) = header_text {
1994 if let Some(vendor_part) = ht.rsplit(" - ").next() {
1996 if vendor_part.starts_with("V-")
1997 || vendor_part.starts_with("VENDOR")
1998 || vendor_part.starts_with("Vendor")
1999 {
2000 line.assignment = Some(vendor_part.to_string());
2001 }
2002 }
2003 }
2004 } else if line.gl_account.starts_with("1100") {
2005 if let Some(ref ht) = header_text {
2007 if let Some(customer_part) = ht.rsplit(" - ").next() {
2008 if customer_part.starts_with("C-")
2009 || customer_part.starts_with("CUST")
2010 || customer_part.starts_with("Customer")
2011 {
2012 line.assignment = Some(customer_part.to_string());
2013 }
2014 }
2015 }
2016 }
2017 }
2018 }
2019 }
2020
2021 pub fn generate(&mut self) -> JournalEntry {
2023 debug!(
2024 count = self.count,
2025 companies = self.companies.len(),
2026 start_date = %self.start_date,
2027 end_date = %self.end_date,
2028 "Generating journal entry"
2029 );
2030
2031 if let Some(ref state) = self.batch_state {
2033 if state.remaining > 0 {
2034 return self.generate_batched_entry();
2035 }
2036 }
2037
2038 if let Some(rev) = self.maybe_generate_reversal() {
2041 return rev;
2042 }
2043
2044 if let Some(alloc) = self.maybe_generate_allocation_batch() {
2047 return alloc;
2048 }
2049
2050 if self.md_resolver.companies.is_empty()
2053 && self.md_resolver.persons.is_empty()
2054 && self.md_resolver.patients.is_empty()
2055 {
2056 self.refresh_md_resolver();
2057 }
2058
2059 self.count += 1;
2060
2061 let document_id = self.generate_deterministic_uuid();
2063
2064 let mut posting_date = if self.loaded_priors.is_none() {
2080 let mut d = self
2081 .temporal_sampler
2082 .sample_date(self.start_date, self.end_date);
2083 if let Some(ref calc) = self.business_day_calculator {
2085 if !calc.is_business_day(d) {
2086 d = calc.next_business_day(d, false);
2087 if d > self.end_date {
2088 d = calc.prev_business_day(self.end_date, true);
2089 }
2090 }
2091 }
2092 d
2093 } else {
2094 self.start_date
2097 };
2098
2099 let company_code = self.company_selector.select(&mut self.rng).to_string();
2101
2102 let copula_uv: Option<(f64, f64)> =
2106 self.correlation_copula.as_mut().map(|cop| cop.sample());
2107
2108 let mut line_spec = self.line_sampler.sample();
2117 if let Some((_u, v)) = copula_uv {
2118 let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
2119 let old_debit = line_spec.debit_count.max(1);
2120 let old_credit = line_spec.credit_count.max(1);
2121 let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2122 .round() as usize;
2123 let new_debit = new_debit.clamp(1, new_total - 1);
2124 let new_credit = new_total - new_debit;
2125 line_spec.total_count = new_total;
2126 line_spec.debit_count = new_debit;
2127 line_spec.credit_count = new_credit;
2128 }
2129
2130 if let Some(cap) = self.config.lines_per_je_cap {
2134 let cap = cap.max(2);
2135 let total = line_spec.debit_count + line_spec.credit_count;
2136 if total > cap {
2137 let new_debit =
2138 ((line_spec.debit_count as f64 / total as f64) * cap as f64).round() as usize;
2139 let new_debit = new_debit.clamp(1, cap - 1);
2140 let new_credit = cap - new_debit;
2141 line_spec.total_count = cap;
2142 line_spec.debit_count = new_debit;
2143 line_spec.credit_count = new_credit;
2144 }
2145 }
2146
2147 let source = self.select_source();
2149 let is_automated = matches!(
2150 source,
2151 TransactionSource::Automated | TransactionSource::Recurring
2152 );
2153
2154 let sap_source_code: Option<String> = self.sample_sap_source_code();
2160 self.current_je_source = sap_source_code.clone();
2163
2164 let business_process = self.select_business_process();
2166
2167 {
2187 let priors_opt = &mut self.loaded_priors;
2189 let rng_ref = &mut self.rng;
2190 let iet_accum_ref = &mut self.iet_day_accum;
2191 let burst_ref = &mut self.iet_burst_remaining;
2192 if let Some(priors) = priors_opt {
2193 let iet_key = sap_source_code
2197 .as_deref()
2198 .unwrap_or_else(|| Self::document_type_for_process(business_process))
2199 .to_string();
2200 let period_days = (self.end_date - self.start_date).num_days().max(1) as f64;
2201
2202 const BURST_THRESHOLD_DAYS: f64 = 2.0;
2228 const BURST_PROB: f64 = 0.30;
2229 const BURST_LEN_MIN: u8 = 2;
2230 const BURST_LEN_MAX: u8 = 4;
2231
2232 let sampled_iet = priors.iet_sampler.sample_next(&iet_key, rng_ref).max(0.001);
2233
2234 let remaining = burst_ref.get(&iet_key).copied().unwrap_or(0);
2236 let iet = if remaining > 0 {
2237 burst_ref.insert(iet_key.clone(), remaining - 1);
2239 rng_ref.random_range(0.25..=1.5)
2240 } else if sampled_iet < BURST_THRESHOLD_DAYS
2241 && rng_ref.random_range(0.0..1.0) < BURST_PROB
2242 {
2243 let len = rng_ref.random_range(BURST_LEN_MIN..=BURST_LEN_MAX);
2247 burst_ref.insert(iet_key.clone(), len);
2248 sampled_iet
2249 } else {
2250 sampled_iet
2251 };
2252
2253 let accum = iet_accum_ref.entry(iet_key).or_insert(0.0);
2254 *accum += iet;
2255 if *accum >= period_days {
2257 *accum %= period_days;
2258 }
2259 let day_offset =
2260 (*accum as i64).clamp(0, (self.end_date - self.start_date).num_days());
2261 posting_date = self.start_date + chrono::Duration::days(day_offset);
2262 if let Some(ref calc) = self.business_day_calculator {
2265 if !calc.is_business_day(posting_date) {
2266 posting_date = calc.next_business_day(posting_date, false);
2267 if posting_date > self.end_date {
2268 posting_date = calc.prev_business_day(self.end_date, true);
2269 }
2270 }
2271 }
2272 } } if let Some(ref priors) = self.loaded_priors {
2289 let doc_type = Self::document_type_for_process(business_process);
2290 let day_in_period = (posting_date - self.start_date).num_days();
2291 let active = match &priors.multi_segment_window {
2292 Some(msw) => msw.is_active(doc_type, day_in_period),
2293 None => priors.active_window.is_active(doc_type, day_in_period),
2294 };
2295 if !active {
2296 posting_date = self
2301 .temporal_sampler
2302 .sample_date(self.start_date, self.end_date);
2303 if let Some(ref calc) = self.business_day_calculator {
2304 if !calc.is_business_day(posting_date) {
2305 posting_date = calc.next_business_day(posting_date, false);
2306 if posting_date > self.end_date {
2307 posting_date = calc.prev_business_day(self.end_date, true);
2308 }
2309 }
2310 }
2311 }
2312 }
2313
2314 if let Some(ref priors) = self.loaded_priors {
2323 let doc_type = Self::document_type_for_process(business_process);
2324 let hist = priors
2325 .lines_per_je
2326 .by_source
2327 .get(doc_type)
2328 .unwrap_or(&priors.lines_per_je.overall);
2329 let n_total = (hist.sample_bucket(&mut self.rng) as usize).max(2);
2330 let old_debit = line_spec.debit_count.max(1);
2331 let old_credit = line_spec.credit_count.max(1);
2332 let new_debit = (n_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2333 .round() as usize;
2334 let new_debit = new_debit.clamp(1, n_total - 1);
2335 line_spec.total_count = n_total;
2336 line_spec.debit_count = new_debit;
2337 line_spec.credit_count = n_total - new_debit;
2338 }
2339
2340 let fraud_type = self.determine_fraud(business_process);
2344 let is_fraud = fraud_type.is_some();
2345
2346 let time = self.temporal_sampler.sample_time(!is_automated);
2348 let created_at = posting_date.and_time(time).and_utc();
2349
2350 let (created_by, user_persona) = self.select_user(is_automated);
2352
2353 let mut header =
2355 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
2356 header.created_at = created_at;
2357 header.source = source;
2358 header.sap_source_code = sap_source_code;
2359
2360 {
2368 let code_opt = header.sap_source_code.clone();
2369 if let Some(ref code) = code_opt {
2370 let rng_ref = &mut self.rng;
2371 let tp_neighbors: Vec<String> = if let Some(ref priors) = self.loaded_priors {
2375 if let Some(ref motifs) = priors.tp_motif_sampler {
2376 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
2377 motifs.neighbors(last_tp).to_vec()
2378 } else {
2379 Vec::new()
2380 }
2381 } else {
2382 Vec::new()
2383 }
2384 } else {
2385 Vec::new()
2386 };
2387 let tp_share_prob: f64 = if let Some(ref priors) = self.loaded_priors {
2388 if let Some(ref motifs) = priors.tp_motif_sampler {
2389 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
2390 motifs.should_share(last_tp)
2391 } else {
2392 0.0
2393 }
2394 } else {
2395 0.0
2396 }
2397 } else {
2398 0.0
2399 };
2400
2401 if let Some(ref mut priors) = self.loaded_priors {
2402 let tp = if !tp_neighbors.is_empty()
2406 && tp_share_prob > 0.0
2407 && rng_ref.random_range(0.0..1.0) < tp_share_prob
2408 {
2409 use datasynth_core::distributions::behavioral_priors::CategoricalDistribution;
2413 let filtered: std::collections::BTreeMap<String, f64> = priors
2414 .per_source_attribute
2415 .as_ref()
2416 .and_then(|psa| psa.conditional(code, "trading_partner"))
2417 .map(|dist| {
2418 dist.probabilities
2419 .iter()
2420 .filter(|(v, _)| tp_neighbors.contains(v))
2421 .map(|(v, p)| (v.clone(), *p))
2422 .collect()
2423 })
2424 .unwrap_or_default();
2425 if filtered.is_empty() {
2426 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2427 } else {
2428 let neighbour_dist = CategoricalDistribution {
2429 probabilities: filtered,
2430 n: 0, };
2432 neighbour_dist.sample(rng_ref).or_else(|| {
2433 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2434 })
2435 }
2436 } else {
2437 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2438 };
2439 header.trading_partner = tp;
2440 }
2441 if let Some(ref tp) = header.trading_partner {
2444 self.last_tp_by_source.insert(code.clone(), tp.clone());
2445 }
2446 }
2447 }
2448
2449 let (created_by, created_at) = {
2454 let sap_code_for_user = header.sap_source_code.clone();
2455 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
2456 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
2457 let new_created_at = if let Some((hour, _)) =
2458 priors.sample_timestamp_for_user(&uid, &mut self.rng)
2459 {
2460 let base = header.created_at;
2461 base.date_naive()
2462 .and_hms_opt(hour, 0, 0)
2463 .map(|naive| naive.and_utc())
2464 .unwrap_or(base)
2465 } else {
2466 header.created_at
2467 };
2468 (uid, new_created_at)
2469 } else {
2470 (created_by, header.created_at)
2471 }
2472 } else {
2473 (created_by, header.created_at)
2474 }
2475 };
2476
2477 header.created_by = created_by;
2478 header.created_at = created_at;
2479 header.user_persona = user_persona;
2480 header.business_process = Some(business_process);
2481 header.document_type = Self::document_type_for_process(business_process).to_string();
2482 header.is_fraud = is_fraud;
2483 header.fraud_type = fraud_type;
2484
2485 let is_manual = matches!(source, TransactionSource::Manual);
2487 header.is_manual = is_manual;
2488
2489 header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
2503
2504 let is_post_close = posting_date.month() == self.end_date.month()
2507 && posting_date.year() == self.end_date.year()
2508 && posting_date.day() > 25;
2509 header.is_post_close = is_post_close;
2510
2511 let created_date = if is_manual {
2514 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
2515 } else {
2516 let lag_days = self.rng.random_range(0i64..=3);
2517 let created_naive_date = posting_date
2518 .checked_sub_signed(chrono::Duration::days(lag_days))
2519 .unwrap_or(posting_date);
2520 created_naive_date.and_hms_opt(
2521 self.rng.random_range(8u32..=17),
2522 self.rng.random_range(0u32..=59),
2523 self.rng.random_range(0u32..=59),
2524 )
2525 };
2526 header.created_date = created_date;
2527
2528 let mut context =
2530 DescriptionContext::with_period(posting_date.month(), posting_date.year());
2531
2532 match business_process {
2534 BusinessProcess::P2P => {
2535 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
2536 context.vendor_name = Some(vendor.name.clone());
2537 }
2538 }
2539 BusinessProcess::O2C => {
2540 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
2541 context.customer_name = Some(customer.name.clone());
2542 }
2543 }
2544 _ => {}
2545 }
2546
2547 if self.template_config.descriptions.generate_header_text {
2551 let priors_header = if let Some(src) = header.sap_source_code.as_deref() {
2552 if let Some(p) = self.loaded_priors.as_ref() {
2553 p.sample_header_template(src, &mut self.md_resolver, &mut self.rng)
2555 } else {
2556 None
2557 }
2558 } else {
2559 None
2560 };
2561 header.header_text = Some(priors_header.unwrap_or_else(|| {
2562 self.description_generator.generate_header_text(
2563 business_process,
2564 &context,
2565 &mut self.rng,
2566 )
2567 }));
2568 }
2569
2570 if self.template_config.references.generate_references {
2577 let priors_ref = header.sap_source_code.as_deref().and_then(|src| {
2578 self.loaded_priors
2579 .as_ref()
2580 .and_then(|p| p.sample_reference(src, &mut self.rng))
2581 });
2582 header.reference = Some(priors_ref.unwrap_or_else(|| {
2583 self.reference_generator
2584 .generate_for_process_year(business_process, posting_date.year())
2585 }));
2586 }
2587
2588 header.source_document = header
2590 .reference
2591 .as_deref()
2592 .and_then(DocumentRef::parse)
2593 .or_else(|| {
2594 if header.source == TransactionSource::Manual {
2595 Some(DocumentRef::Manual)
2596 } else {
2597 None
2598 }
2599 });
2600
2601 let mut entry = JournalEntry::new(header);
2603
2604 let base_amount = if let Some(ft) = fraud_type {
2610 let pattern = self.fraud_type_to_amount_pattern(ft);
2611 self.amount_sampler.sample_fraud(pattern)
2612 } else if let Some(ref mut adv) = self.advanced_amount_sampler {
2613 adv.sample_decimal()
2614 } else {
2615 self.amount_sampler.sample()
2616 };
2617 let base_amount = if fraud_type.is_none() {
2623 let input = self.conditional_input_value(posting_date);
2627 if let Some(ref mut cond) = self.conditional_amount_override {
2628 cond.sample_decimal(input)
2629 } else {
2630 base_amount
2631 }
2632 } else {
2633 base_amount
2634 };
2635
2636 const PRIORS_AMOUNT_BYPASS_SHARE: f64 = 0.25;
2658 let base_amount = if fraud_type.is_none() {
2659 if let Some(src) = entry.header.sap_source_code.as_deref() {
2660 let src_owned = src.to_string();
2661 let use_conditional = self.loaded_priors.is_some()
2664 && self.rng.random_range(0.0..1.0) >= PRIORS_AMOUNT_BYPASS_SHARE;
2665 if use_conditional {
2666 let priors_ref = &mut self.loaded_priors;
2667 let rng_ref = &mut self.rng;
2668 if let Some(priors) = priors_ref {
2669 priors
2670 .sample_amount_for_source(&src_owned, "", rng_ref)
2671 .and_then(|v| {
2672 if v.is_finite() && v > 0.0 {
2673 Decimal::from_f64_retain(v)
2674 } else {
2675 None
2676 }
2677 })
2678 .unwrap_or(base_amount)
2679 } else {
2680 base_amount
2681 }
2682 } else {
2683 base_amount
2684 }
2685 } else {
2686 base_amount
2687 }
2688 } else {
2689 base_amount
2690 };
2691
2692 let base_amount = if fraud_type.is_none() {
2703 if let Some((u, _v)) = copula_uv {
2704 if let Some(ref adv) = self.advanced_amount_sampler {
2705 adv.ppf_decimal(u)
2706 } else {
2707 let log_mult = 4.0 * (u - 0.5);
2708 let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
2709 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2710 }
2711 } else {
2712 base_amount
2713 }
2714 } else {
2715 base_amount
2716 };
2717
2718 let drift_adjusted_amount = {
2720 let drift = self.get_drift_adjustments(posting_date);
2721 if drift.amount_mean_multiplier != 1.0 {
2722 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
2724 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
2725 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2726 } else {
2727 base_amount
2728 }
2729 };
2730
2731 let total_amount = if is_automated {
2733 drift_adjusted_amount } else {
2735 self.apply_human_variation(drift_adjusted_amount)
2736 };
2737
2738 let doc_type_for_fanout = Self::document_type_for_process(business_process).to_string();
2742
2743 let (gl_neighbor_vec, gl_share_prob): (Vec<String>, f64) =
2748 if let Some(priors) = &self.loaded_priors {
2749 if let Some(motifs) = &priors.cross_entity_motifs {
2750 (
2751 motifs.neighbors(&doc_type_for_fanout).to_vec(),
2752 motifs.should_share(&doc_type_for_fanout),
2753 )
2754 } else {
2755 (Vec::new(), 0.0)
2756 }
2757 } else {
2758 (Vec::new(), 0.0)
2759 };
2760
2761 let reuse_archetype = self.pick_recurring_archetype(
2768 &entry.header.company_code,
2769 &doc_type_for_fanout,
2770 line_spec.debit_count,
2771 line_spec.credit_count,
2772 );
2773 let mut fresh_debit_accts: Vec<String> = Vec::new();
2774 let mut fresh_credit_accts: Vec<String> = Vec::new();
2775 let sota8_active = self.config.source_conditional_account_pair.enabled;
2778
2779 let debit_amounts = self
2781 .amount_sampler
2782 .sample_summing_to(line_spec.debit_count, total_amount);
2783 for (i, amount) in debit_amounts.into_iter().enumerate() {
2784 let debit_fallback = self.select_debit_account().account_number.clone();
2793 let account_number = if sota8_active {
2799 debit_fallback
2800 } else {
2801 let priors_opt = &mut self.loaded_priors;
2802 let rng_ref = &mut self.rng;
2803 if let Some(priors) = priors_opt {
2804 let sp46_gl = entry
2808 .header
2809 .sap_source_code
2810 .as_deref()
2811 .and_then(|code| priors.sample_gl_for_source_role(code, "DR", rng_ref));
2812 if let Some(gl) = sp46_gl {
2813 gl
2814 } else {
2815 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2817 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2818 });
2819 if let Some(gl) = sp37_gl {
2820 gl
2821 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2822 sampler.pick_for_with_neighbors(
2824 &doc_type_for_fanout,
2825 &gl_neighbor_vec,
2826 gl_share_prob,
2827 rng_ref,
2828 )
2829 } else {
2830 debit_fallback
2831 }
2832 }
2833 } else {
2834 debit_fallback
2835 }
2836 };
2837 let mut line = JournalEntryLine::debit(
2838 entry.header.document_id,
2839 (i + 1) as u32,
2840 account_number.clone(),
2841 amount,
2842 );
2843
2844 if self.template_config.descriptions.generate_line_text {
2847 let src = entry.header.sap_source_code.as_deref();
2848 let priors_line = if let Some(s) = src {
2849 if let Some(p) = self.loaded_priors.as_ref() {
2850 let account_class = p
2851 .coa_semantic
2852 .as_ref()
2853 .and_then(|c| c.accounts.get(&account_number))
2854 .and_then(|a| a.account_class.as_deref())
2855 .unwrap_or(
2856 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2857 );
2858 p.sample_line_template(
2860 s,
2861 account_class,
2862 &mut self.md_resolver,
2863 &mut self.rng,
2864 )
2865 } else {
2866 None
2867 }
2868 } else {
2869 None
2870 };
2871 line.line_text = Some(priors_line.unwrap_or_else(|| {
2872 self.description_generator.generate_line_text(
2873 &account_number,
2874 &context,
2875 &mut self.rng,
2876 )
2877 }));
2878 }
2879
2880 if let Some((ref d, _)) = reuse_archetype {
2888 if let Some(a) = d.get(i) {
2889 line.gl_account = a.clone();
2890 }
2891 } else if self.loaded_priors.is_none() {
2892 fresh_debit_accts.push(line.gl_account.clone());
2893 }
2894 entry.add_line(line);
2895 }
2896
2897 let credit_amounts = self
2899 .amount_sampler
2900 .sample_summing_to(line_spec.credit_count, total_amount);
2901 for (i, amount) in credit_amounts.into_iter().enumerate() {
2902 let credit_fallback = self.select_credit_account().account_number.clone();
2904 let account_number = if sota8_active {
2906 credit_fallback
2907 } else {
2908 let priors_opt = &mut self.loaded_priors;
2909 let rng_ref = &mut self.rng;
2910 if let Some(priors) = priors_opt {
2911 let sp46_gl = entry
2912 .header
2913 .sap_source_code
2914 .as_deref()
2915 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
2916 if let Some(gl) = sp46_gl {
2917 gl
2918 } else {
2919 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2920 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2921 });
2922 if let Some(gl) = sp37_gl {
2923 gl
2924 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2925 sampler.pick_for_with_neighbors(
2926 &doc_type_for_fanout,
2927 &gl_neighbor_vec,
2928 gl_share_prob,
2929 rng_ref,
2930 )
2931 } else {
2932 credit_fallback
2933 }
2934 }
2935 } else {
2936 credit_fallback
2937 }
2938 };
2939 let mut line = JournalEntryLine::credit(
2940 entry.header.document_id,
2941 (line_spec.debit_count + i + 1) as u32,
2942 account_number.clone(),
2943 amount,
2944 );
2945
2946 if self.template_config.descriptions.generate_line_text {
2949 let src = entry.header.sap_source_code.as_deref();
2950 let priors_line = if let Some(s) = src {
2951 if let Some(p) = self.loaded_priors.as_ref() {
2952 let account_class = p
2953 .coa_semantic
2954 .as_ref()
2955 .and_then(|c| c.accounts.get(&account_number))
2956 .and_then(|a| a.account_class.as_deref())
2957 .unwrap_or(
2958 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2959 );
2960 p.sample_line_template(
2962 s,
2963 account_class,
2964 &mut self.md_resolver,
2965 &mut self.rng,
2966 )
2967 } else {
2968 None
2969 }
2970 } else {
2971 None
2972 };
2973 line.line_text = Some(priors_line.unwrap_or_else(|| {
2974 self.description_generator.generate_line_text(
2975 &account_number,
2976 &context,
2977 &mut self.rng,
2978 )
2979 }));
2980 }
2981
2982 if let Some((_, ref c)) = reuse_archetype {
2986 if let Some(a) = c.get(i) {
2987 line.gl_account = a.clone();
2988 }
2989 } else if self.loaded_priors.is_none() {
2990 fresh_credit_accts.push(line.gl_account.clone());
2991 }
2992 entry.add_line(line);
2993 }
2994
2995 if reuse_archetype.is_none() {
2998 self.cache_recurring_archetype(
2999 &entry.header.company_code,
3000 &doc_type_for_fanout,
3001 std::mem::take(&mut fresh_debit_accts),
3002 std::mem::take(&mut fresh_credit_accts),
3003 );
3004 }
3005
3006 self.enrich_line_items(&mut entry);
3008
3009 if self.persona_errors_enabled && !is_automated {
3011 self.maybe_inject_persona_error(&mut entry);
3012 }
3013
3014 if self.approval_enabled {
3016 self.maybe_apply_approval_workflow(&mut entry, posting_date);
3017 }
3018
3019 self.populate_approval_fields(&mut entry, posting_date);
3021
3022 self.maybe_start_batch(&entry);
3024
3025 if self.velocity_calibrator.is_some() {
3028 let mut pending: Vec<crate::velocity_calibrator::CalibrationStep> = Vec::new();
3029 for line in &entry.lines {
3030 if let Some(step) = self
3031 .velocity_calibrator
3032 .as_mut()
3033 .and_then(|cal| cal.observe_line(line))
3034 {
3035 pending.push(step);
3036 }
3037 }
3038 for step in pending {
3039 self.apply_calibration_step(&step);
3040 }
3041 }
3042
3043 self.maybe_apply_foreign_currency(&mut entry);
3046
3047 self.record_for_reversal(&entry);
3049
3050 entry
3051 }
3052
3053 fn apply_calibration_step(&mut self, step: &crate::velocity_calibrator::CalibrationStep) {
3061 match step.parameter.as_str() {
3062 "amounts.lognormal_sigma" => {
3063 self.amount_sampler.set_lognormal_sigma(step.new_value);
3064 }
3065 "amounts.round_dollar_share" => {
3066 self.amount_sampler
3067 .set_round_number_probability(step.new_value);
3068 }
3069 _ => {
3070 }
3073 }
3074 }
3075
3076 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
3081 self.persona_errors_enabled = enabled;
3082 self
3083 }
3084
3085 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
3090 self.fraud_config = config;
3091 self
3092 }
3093
3094 pub fn persona_errors_enabled(&self) -> bool {
3096 self.persona_errors_enabled
3097 }
3098
3099 pub fn with_batching(mut self, enabled: bool) -> Self {
3104 if !enabled {
3105 self.batch_state = None;
3106 }
3107 self
3108 }
3109
3110 pub fn batching_enabled(&self) -> bool {
3112 true
3114 }
3115
3116 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
3121 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
3123 return;
3124 }
3125
3126 if self.rng.random::<f64>() > 0.15 {
3128 return;
3129 }
3130
3131 let base_account = entry
3133 .lines
3134 .first()
3135 .map(|l| l.gl_account.clone())
3136 .unwrap_or_default();
3137
3138 let base_amount = entry.total_debit();
3139
3140 self.batch_state = Some(BatchState {
3141 base_account_number: base_account,
3142 base_amount,
3143 base_business_process: entry.header.business_process,
3144 base_posting_date: entry.header.posting_date,
3145 remaining: self.rng.random_range(2..7), });
3147 }
3148
3149 fn generate_batched_entry(&mut self) -> JournalEntry {
3157 use rust_decimal::Decimal;
3158
3159 if let Some(ref mut state) = self.batch_state {
3161 state.remaining = state.remaining.saturating_sub(1);
3162 }
3163
3164 let Some(batch) = self.batch_state.clone() else {
3165 tracing::warn!(
3168 "generate_batched_entry called without batch_state; generating standard entry"
3169 );
3170 self.batch_state = None;
3171 return self.generate();
3172 };
3173
3174 let posting_date = batch.base_posting_date;
3176
3177 self.count += 1;
3178 let document_id = self.generate_deterministic_uuid();
3179
3180 let company_code = self.company_selector.select(&mut self.rng).to_string();
3182
3183 let _line_spec = LineItemSpec {
3185 total_count: 2,
3186 debit_count: 1,
3187 credit_count: 1,
3188 split_type: DebitCreditSplit::Equal,
3189 };
3190
3191 let source = TransactionSource::Manual;
3193
3194 let sap_source_code: Option<String> = self.sample_sap_source_code();
3196 self.current_je_source = sap_source_code.clone();
3198
3199 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
3201
3202 let time = self.temporal_sampler.sample_time(true);
3204 let created_at = posting_date.and_time(time).and_utc();
3205
3206 let (created_by, user_persona) = self.select_user(false);
3208
3209 let mut header =
3211 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
3212 header.created_at = created_at;
3213 header.source = source;
3214 header.sap_source_code = sap_source_code;
3215
3216 {
3220 let code_opt = header.sap_source_code.clone();
3221 if let Some(ref code) = code_opt {
3222 let rng_ref = &mut self.rng;
3223 let tp_neighbors: Vec<String> = if let Some(ref priors) = self.loaded_priors {
3224 if let Some(ref motifs) = priors.tp_motif_sampler {
3225 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3226 motifs.neighbors(last_tp).to_vec()
3227 } else {
3228 Vec::new()
3229 }
3230 } else {
3231 Vec::new()
3232 }
3233 } else {
3234 Vec::new()
3235 };
3236 let tp_share_prob: f64 = if let Some(ref priors) = self.loaded_priors {
3237 if let Some(ref motifs) = priors.tp_motif_sampler {
3238 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3239 motifs.should_share(last_tp)
3240 } else {
3241 0.0
3242 }
3243 } else {
3244 0.0
3245 }
3246 } else {
3247 0.0
3248 };
3249 if let Some(ref mut priors) = self.loaded_priors {
3250 use datasynth_core::distributions::behavioral_priors::CategoricalDistribution;
3251 let tp = if !tp_neighbors.is_empty()
3252 && tp_share_prob > 0.0
3253 && rng_ref.random_range(0.0..1.0) < tp_share_prob
3254 {
3255 let filtered: std::collections::BTreeMap<String, f64> = priors
3256 .per_source_attribute
3257 .as_ref()
3258 .and_then(|psa| psa.conditional(code, "trading_partner"))
3259 .map(|dist| {
3260 dist.probabilities
3261 .iter()
3262 .filter(|(v, _)| tp_neighbors.contains(v))
3263 .map(|(v, p)| (v.clone(), *p))
3264 .collect()
3265 })
3266 .unwrap_or_default();
3267 if filtered.is_empty() {
3268 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3269 } else {
3270 let neighbour_dist = CategoricalDistribution {
3271 probabilities: filtered,
3272 n: 0,
3273 };
3274 neighbour_dist.sample(rng_ref).or_else(|| {
3275 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3276 })
3277 }
3278 } else {
3279 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3280 };
3281 header.trading_partner = tp;
3282 }
3283 if let Some(ref tp) = header.trading_partner {
3284 self.last_tp_by_source.insert(code.clone(), tp.clone());
3285 }
3286 }
3287 }
3288
3289 let (created_by, created_at) = {
3291 let sap_code_for_user = header.sap_source_code.clone();
3292 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
3293 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
3294 let new_created_at = if let Some((hour, _)) =
3295 priors.sample_timestamp_for_user(&uid, &mut self.rng)
3296 {
3297 let base = header.created_at;
3298 base.date_naive()
3299 .and_hms_opt(hour, 0, 0)
3300 .map(|naive| naive.and_utc())
3301 .unwrap_or(base)
3302 } else {
3303 header.created_at
3304 };
3305 (uid, new_created_at)
3306 } else {
3307 (created_by, header.created_at)
3308 }
3309 } else {
3310 (created_by, header.created_at)
3311 }
3312 };
3313
3314 header.created_by = created_by;
3315 header.created_at = created_at;
3316 header.user_persona = user_persona;
3317 header.business_process = Some(business_process);
3318 header.document_type = Self::document_type_for_process(business_process).to_string();
3319
3320 header.source_document = Some(DocumentRef::Manual);
3322
3323 header.is_manual = true;
3325 header.source_system = if self.rng.random::<f64>() < 0.70 {
3326 "manual".to_string()
3327 } else {
3328 "spreadsheet".to_string()
3329 };
3330 header.is_post_close = posting_date.month() == self.end_date.month()
3331 && posting_date.year() == self.end_date.year()
3332 && posting_date.day() > 25;
3333 header.created_date =
3334 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
3335
3336 let variation = self.rng.random_range(-0.15..0.15);
3338 let varied_amount =
3339 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
3340 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
3341
3342 let mut entry = JournalEntry::new(header);
3344
3345 let debit_line = JournalEntryLine::debit(
3347 entry.header.document_id,
3348 1,
3349 batch.base_account_number.clone(),
3350 total_amount,
3351 );
3352 entry.add_line(debit_line);
3353
3354 let credit_fallback = self.select_credit_account().account_number.clone();
3361 let credit_account = {
3362 let priors_opt = &mut self.loaded_priors;
3363 let rng_ref = &mut self.rng;
3364 if let Some(priors) = priors_opt {
3365 let sp46_gl = entry
3368 .header
3369 .sap_source_code
3370 .as_deref()
3371 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
3372 if let Some(gl) = sp46_gl {
3373 gl
3374 } else {
3375 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
3376 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
3377 });
3378 sp37_gl.unwrap_or(credit_fallback)
3379 }
3380 } else {
3381 credit_fallback
3382 }
3383 };
3384 let credit_line =
3385 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
3386 entry.add_line(credit_line);
3387
3388 self.enrich_line_items(&mut entry);
3390
3391 if self.persona_errors_enabled {
3393 self.maybe_inject_persona_error(&mut entry);
3394 }
3395
3396 if self.approval_enabled {
3398 self.maybe_apply_approval_workflow(&mut entry, posting_date);
3399 }
3400
3401 self.populate_approval_fields(&mut entry, posting_date);
3403
3404 if batch.remaining <= 1 {
3406 self.batch_state = None;
3407 }
3408
3409 entry
3410 }
3411
3412 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
3414 let persona_str = &entry.header.user_persona;
3416 let persona = match persona_str.to_lowercase().as_str() {
3417 s if s.contains("junior") => UserPersona::JuniorAccountant,
3418 s if s.contains("senior") => UserPersona::SeniorAccountant,
3419 s if s.contains("controller") => UserPersona::Controller,
3420 s if s.contains("manager") => UserPersona::Manager,
3421 s if s.contains("executive") => UserPersona::Executive,
3422 _ => return, };
3424
3425 let base_error_rate = persona.error_rate();
3427
3428 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
3430
3431 if self.rng.random::<f64>() >= adjusted_rate {
3433 return; }
3435
3436 self.inject_human_error(entry, persona);
3438 }
3439
3440 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
3449 use chrono::Datelike;
3450
3451 let mut rate = base_rate;
3452 let day = posting_date.day();
3453 let month = posting_date.month();
3454
3455 if month == 12 && day >= 28 {
3457 rate *= 2.0;
3458 return rate.min(0.5); }
3460
3461 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
3463 rate *= 1.75; return rate.min(0.4);
3465 }
3466
3467 if day >= 28 {
3469 rate *= 1.5; }
3471
3472 let weekday = posting_date.weekday();
3474 match weekday {
3475 chrono::Weekday::Mon => {
3476 rate *= 1.2;
3478 }
3479 chrono::Weekday::Fri => {
3480 rate *= 1.3;
3482 }
3483 _ => {}
3484 }
3485
3486 rate.min(0.4)
3488 }
3489
3490 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
3499 use rust_decimal::Decimal;
3500
3501 if amount < Decimal::from(10) {
3503 return amount;
3504 }
3505
3506 if self.rng.random::<f64>() > 0.70 {
3508 return amount;
3509 }
3510
3511 let variation_type: u8 = self.rng.random_range(0..4);
3513
3514 match variation_type {
3515 0 => {
3516 let variation_pct = self.rng.random_range(-0.02..0.02);
3518 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
3519 (amount + variation).round_dp(2)
3520 }
3521 1 => {
3522 let ten = Decimal::from(10);
3524 (amount / ten).round() * ten
3525 }
3526 2 => {
3527 if amount >= Decimal::from(500) {
3529 let hundred = Decimal::from(100);
3530 (amount / hundred).round() * hundred
3531 } else {
3532 amount
3533 }
3534 }
3535 3 => {
3536 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
3538 (amount + cents).max(Decimal::ZERO).round_dp(2)
3539 }
3540 _ => amount,
3541 }
3542 }
3543
3544 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
3550 let balancing_idx = entry.lines.iter().position(|l| {
3552 if modified_was_debit {
3553 l.credit_amount > Decimal::ZERO
3554 } else {
3555 l.debit_amount > Decimal::ZERO
3556 }
3557 });
3558
3559 if let Some(idx) = balancing_idx {
3560 if modified_was_debit {
3561 entry.lines[idx].credit_amount += impact;
3562 } else {
3563 entry.lines[idx].debit_amount += impact;
3564 }
3565 }
3566 }
3567
3568 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
3573 use rust_decimal::Decimal;
3574
3575 let error_type: u8 = match persona {
3577 UserPersona::JuniorAccountant => {
3578 self.rng.random_range(0..5)
3580 }
3581 UserPersona::SeniorAccountant => {
3582 self.rng.random_range(0..3)
3584 }
3585 UserPersona::Controller | UserPersona::Manager => {
3586 self.rng.random_range(3..5)
3588 }
3589 _ => return,
3590 };
3591
3592 match error_type {
3593 0 => {
3594 if let Some(line) = entry.lines.get_mut(0) {
3596 let is_debit = line.debit_amount > Decimal::ZERO;
3597 let original_amount = if is_debit {
3598 line.debit_amount
3599 } else {
3600 line.credit_amount
3601 };
3602
3603 let s = original_amount.to_string();
3605 if s.len() >= 2 {
3606 let chars: Vec<char> = s.chars().collect();
3607 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
3608 if chars[pos].is_ascii_digit()
3609 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
3610 {
3611 let mut new_chars = chars;
3612 new_chars.swap(pos, pos + 1);
3613 if let Ok(new_amount) =
3614 new_chars.into_iter().collect::<String>().parse::<Decimal>()
3615 {
3616 let impact = new_amount - original_amount;
3617
3618 if is_debit {
3620 entry.lines[0].debit_amount = new_amount;
3621 } else {
3622 entry.lines[0].credit_amount = new_amount;
3623 }
3624
3625 Self::rebalance_entry(entry, is_debit, impact);
3627
3628 entry.header.header_text = Some(
3629 entry.header.header_text.clone().unwrap_or_default()
3630 + " [HUMAN_ERROR:TRANSPOSITION]",
3631 );
3632 }
3633 }
3634 }
3635 }
3636 }
3637 1 => {
3638 if let Some(line) = entry.lines.get_mut(0) {
3640 let is_debit = line.debit_amount > Decimal::ZERO;
3641 let original_amount = if is_debit {
3642 line.debit_amount
3643 } else {
3644 line.credit_amount
3645 };
3646
3647 let new_amount = original_amount * Decimal::new(10, 0);
3648 let impact = new_amount - original_amount;
3649
3650 if is_debit {
3652 entry.lines[0].debit_amount = new_amount;
3653 } else {
3654 entry.lines[0].credit_amount = new_amount;
3655 }
3656
3657 Self::rebalance_entry(entry, is_debit, impact);
3659
3660 entry.header.header_text = Some(
3661 entry.header.header_text.clone().unwrap_or_default()
3662 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
3663 );
3664 }
3665 }
3666 2 => {
3667 if let Some(ref mut text) = entry.header.header_text {
3669 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
3670 let correct = ["the", "and", "with", "that", "receive"];
3671 let idx = self.rng.random_range(0..typos.len());
3672 if text.to_lowercase().contains(correct[idx]) {
3673 *text = text.replace(correct[idx], typos[idx]);
3674 *text = format!("{text} [HUMAN_ERROR:TYPO]");
3675 }
3676 }
3677 }
3678 3 => {
3679 if let Some(line) = entry.lines.get_mut(0) {
3681 let is_debit = line.debit_amount > Decimal::ZERO;
3682 let original_amount = if is_debit {
3683 line.debit_amount
3684 } else {
3685 line.credit_amount
3686 };
3687
3688 let new_amount =
3689 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
3690 let impact = new_amount - original_amount;
3691
3692 if is_debit {
3694 entry.lines[0].debit_amount = new_amount;
3695 } else {
3696 entry.lines[0].credit_amount = new_amount;
3697 }
3698
3699 Self::rebalance_entry(entry, is_debit, impact);
3701
3702 entry.header.header_text = Some(
3703 entry.header.header_text.clone().unwrap_or_default()
3704 + " [HUMAN_ERROR:ROUNDED]",
3705 );
3706 }
3707 }
3708 4 if entry.header.document_date == entry.header.posting_date => {
3711 let days_late = self.rng.random_range(5..15);
3712 entry.header.document_date =
3713 entry.header.posting_date - chrono::Duration::days(days_late);
3714 entry.header.header_text = Some(
3715 entry.header.header_text.clone().unwrap_or_default()
3716 + " [HUMAN_ERROR:LATE_POSTING]",
3717 );
3718 }
3719 _ => {}
3720 }
3721 }
3722
3723 fn maybe_apply_approval_workflow(
3728 &mut self,
3729 entry: &mut JournalEntry,
3730 _posting_date: NaiveDate,
3731 ) {
3732 use rust_decimal::Decimal;
3733
3734 let amount = entry.total_debit();
3735
3736 if amount <= self.approval_threshold {
3738 let workflow = ApprovalWorkflow::auto_approved(
3740 entry.header.created_by.clone(),
3741 entry.header.user_persona.clone(),
3742 amount,
3743 entry.header.created_at,
3744 );
3745 entry.header.approval_workflow = Some(workflow);
3746 return;
3747 }
3748
3749 entry.header.sox_relevant = true;
3751
3752 let required_levels = if amount > Decimal::new(100000, 0) {
3754 3 } else if amount > Decimal::new(50000, 0) {
3756 2 } else {
3758 1 };
3760
3761 let mut workflow = ApprovalWorkflow::new(
3763 entry.header.created_by.clone(),
3764 entry.header.user_persona.clone(),
3765 amount,
3766 );
3767 workflow.required_levels = required_levels;
3768
3769 let submit_time = entry.header.created_at;
3771 let submit_action = ApprovalAction::new(
3772 entry.header.created_by.clone(),
3773 entry.header.user_persona.clone(),
3774 self.parse_persona(&entry.header.user_persona),
3775 ApprovalActionType::Submit,
3776 0,
3777 )
3778 .with_timestamp(submit_time);
3779
3780 workflow.actions.push(submit_action);
3781 workflow.status = ApprovalStatus::Pending;
3782 workflow.submitted_at = Some(submit_time);
3783
3784 let mut current_time = submit_time;
3786 for level in 1..=required_levels {
3787 let delay_hours = self.rng.random_range(1..4);
3789 current_time += chrono::Duration::hours(delay_hours);
3790
3791 while current_time.weekday() == chrono::Weekday::Sat
3793 || current_time.weekday() == chrono::Weekday::Sun
3794 {
3795 current_time += chrono::Duration::days(1);
3796 }
3797
3798 let (approver_id, approver_role) = self.select_approver(level);
3800
3801 let approve_action = ApprovalAction::new(
3802 approver_id.clone(),
3803 approver_role.to_string(),
3804 approver_role,
3805 ApprovalActionType::Approve,
3806 level,
3807 )
3808 .with_timestamp(current_time);
3809
3810 workflow.actions.push(approve_action);
3811 workflow.current_level = level;
3812 }
3813
3814 workflow.status = ApprovalStatus::Approved;
3816 workflow.approved_at = Some(current_time);
3817
3818 entry.header.approval_workflow = Some(workflow);
3819 }
3820
3821 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
3823 let persona = match level {
3824 1 => UserPersona::Manager,
3825 2 => UserPersona::Controller,
3826 _ => UserPersona::Executive,
3827 };
3828
3829 if let Some(ref pool) = self.user_pool {
3831 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
3832 return (user.user_id.clone(), persona);
3833 }
3834 }
3835
3836 let approver_id = match persona {
3838 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
3839 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
3840 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
3841 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
3842 };
3843
3844 (approver_id, persona)
3845 }
3846
3847 fn parse_persona(&self, persona_str: &str) -> UserPersona {
3849 match persona_str.to_lowercase().as_str() {
3850 s if s.contains("junior") => UserPersona::JuniorAccountant,
3851 s if s.contains("senior") => UserPersona::SeniorAccountant,
3852 s if s.contains("controller") => UserPersona::Controller,
3853 s if s.contains("manager") => UserPersona::Manager,
3854 s if s.contains("executive") => UserPersona::Executive,
3855 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
3856 _ => UserPersona::JuniorAccountant, }
3858 }
3859
3860 pub fn with_approval(mut self, enabled: bool) -> Self {
3862 self.approval_enabled = enabled;
3863 self
3864 }
3865
3866 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
3868 self.approval_threshold = threshold;
3869 self
3870 }
3871
3872 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
3878 self.sod_violation_rate = rate;
3879 self
3880 }
3881
3882 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
3885 if let Some(ref workflow) = entry.header.approval_workflow {
3886 let last_approver = workflow
3888 .actions
3889 .iter()
3890 .rev()
3891 .find(|a| matches!(a.action, ApprovalActionType::Approve));
3892
3893 if let Some(approver_action) = last_approver {
3894 entry.header.approved_by = Some(approver_action.actor_id.clone());
3895 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
3896 } else {
3897 entry.header.approved_by = Some(workflow.preparer_id.clone());
3899 entry.header.approval_date = Some(posting_date);
3900 }
3901
3902 if self.rng.random::<f64>() < self.sod_violation_rate {
3904 let creator = entry.header.created_by.clone();
3905 entry.header.approved_by = Some(creator);
3906 entry.header.sod_violation = true;
3907 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
3908 }
3909 }
3910 }
3911
3912 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
3918 self.drift_controller = Some(controller);
3919 self
3920 }
3921
3922 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
3927 if config.enabled {
3928 let total_periods = self.calculate_total_periods();
3929 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
3930 }
3931 self
3932 }
3933
3934 fn calculate_total_periods(&self) -> u32 {
3936 let start_year = self.start_date.year();
3937 let start_month = self.start_date.month();
3938 let end_year = self.end_date.year();
3939 let end_month = self.end_date.month();
3940
3941 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
3942 }
3943
3944 fn date_to_period(&self, date: NaiveDate) -> u32 {
3946 let start_year = self.start_date.year();
3947 let start_month = self.start_date.month() as i32;
3948 let date_year = date.year();
3949 let date_month = date.month() as i32;
3950
3951 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
3952 }
3953
3954 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
3956 if let Some(ref controller) = self.drift_controller {
3957 let period = self.date_to_period(date);
3958 controller.compute_adjustments(period)
3959 } else {
3960 DriftAdjustments::none()
3961 }
3962 }
3963
3964 #[inline]
3966 fn select_user(&mut self, is_automated: bool) -> (String, String) {
3967 if let Some(ref pool) = self.user_pool {
3968 let persona = if is_automated {
3969 UserPersona::AutomatedSystem
3970 } else {
3971 let roll: f64 = self.rng.random();
3973 if roll < 0.4 {
3974 UserPersona::JuniorAccountant
3975 } else if roll < 0.7 {
3976 UserPersona::SeniorAccountant
3977 } else if roll < 0.85 {
3978 UserPersona::Controller
3979 } else {
3980 UserPersona::Manager
3981 }
3982 };
3983
3984 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
3985 return (user.user_id.clone(), user.persona.to_string());
3986 }
3987 }
3988
3989 if is_automated {
3991 (
3992 format!("BATCH{:04}", self.rng.random_range(1..=20)),
3993 "automated_system".to_string(),
3994 )
3995 } else {
3996 (
3997 format!("USER{:04}", self.rng.random_range(1..=40)),
3998 "senior_accountant".to_string(),
3999 )
4000 }
4001 }
4002
4003 #[inline]
4005 fn select_source(&mut self) -> TransactionSource {
4006 let roll: f64 = self.rng.random();
4007 let dist = &self.config.source_distribution;
4008
4009 if roll < dist.manual {
4010 TransactionSource::Manual
4011 } else if roll < dist.manual + dist.automated {
4012 TransactionSource::Automated
4013 } else if roll < dist.manual + dist.automated + dist.recurring {
4014 TransactionSource::Recurring
4015 } else {
4016 TransactionSource::Adjustment
4017 }
4018 }
4019
4020 #[inline]
4022 fn document_type_for_process(process: BusinessProcess) -> &'static str {
4031 match process {
4032 BusinessProcess::P2P => "KR",
4033 BusinessProcess::O2C => "DR",
4034 BusinessProcess::R2R => "SA",
4035 BusinessProcess::H2R => "HR",
4036 BusinessProcess::A2R => "AA",
4037 _ => "SA",
4038 }
4039 }
4040
4041 fn select_business_process(&mut self) -> BusinessProcess {
4042 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
4043 }
4044
4045 #[inline]
4050 fn power_law_index(n: usize, rng: &mut ChaCha8Rng) -> Option<usize> {
4051 if n == 0 || n > ZIPF_CAP {
4052 return None;
4053 }
4054 let total = ZIPF_CUM[n];
4055 let r = rng.random::<f64>() * total;
4056 let k = ZIPF_CUM[..=n]
4058 .binary_search_by(|v| v.partial_cmp(&r).unwrap_or(std::cmp::Ordering::Less))
4059 .unwrap_or_else(|e| e);
4060 Some(k.saturating_sub(1).min(n - 1))
4061 }
4062
4063 #[inline]
4070 fn concentrate<'a>(
4071 enabled: bool,
4072 rng: &mut ChaCha8Rng,
4073 all: &[&'a GLAccount],
4074 uniform: Option<&'a GLAccount>,
4075 ) -> Option<&'a GLAccount> {
4076 if enabled {
4077 Self::power_law_index(all.len(), rng)
4078 .map(|i| all[i])
4079 .or(uniform)
4080 } else {
4081 uniform
4082 }
4083 }
4084
4085 fn ensure_cond_pair_pool(&mut self, source: &str) {
4088 let cfg = &self.config.source_conditional_account_pair;
4089 if !cfg.enabled {
4090 return;
4091 }
4092 if self.cond_pair_sampler.is_none() {
4093 self.cond_pair_sampler = Some(Default::default());
4094 }
4095 let sampler = self
4096 .cond_pair_sampler
4097 .as_mut()
4098 .expect("just-initialised above");
4099 if sampler.pool(source).is_some() {
4100 return;
4101 }
4102 let all_accounts: Vec<String> = self
4103 .coa
4104 .accounts
4105 .iter()
4106 .map(|a| a.account_number.clone())
4107 .collect();
4108 if all_accounts.is_empty() {
4109 return;
4110 }
4111 let weights: Vec<f64> = vec![1.0; all_accounts.len()];
4114 sampler.ensure_pool(
4115 source,
4116 &all_accounts,
4117 &weights,
4118 cfg.accts_per_source_target,
4119 cfg.concentration,
4120 &mut self.cond_pair_rng,
4121 );
4122 }
4123
4124 #[inline]
4129 fn try_cond_pick_account_number(&mut self) -> Option<String> {
4130 let cfg = &self.config.source_conditional_account_pair;
4131 if !cfg.enabled {
4132 return None;
4133 }
4134 let src = self.current_je_source.clone()?;
4135 self.ensure_cond_pair_pool(&src);
4136 let sampler = self.cond_pair_sampler.as_ref()?;
4137 let pool = sampler.pool(&src)?;
4138 Some(pool.sample_one(&mut self.cond_pair_rng).to_string())
4139 }
4140
4141 #[inline]
4142 fn select_debit_account(&mut self) -> &GLAccount {
4143 if let Some(acct_num) = self.try_cond_pick_account_number() {
4145 if let Some(a) = self
4146 .coa
4147 .accounts
4148 .iter()
4149 .find(|a| a.account_number == acct_num)
4150 {
4151 return a;
4152 }
4153 }
4155 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
4156 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
4157
4158 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4160 accounts
4161 } else {
4162 expense_accounts
4163 };
4164
4165 let uniform = all.choose(&mut self.rng).copied();
4166 let enabled = self.config.account_concentration.unwrap_or(true);
4167 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4168 tracing::warn!(
4169 "Account selection returned empty list, falling back to first COA account"
4170 );
4171 &self.coa.accounts[0]
4172 })
4173 }
4174
4175 #[inline]
4176 fn select_credit_account(&mut self) -> &GLAccount {
4177 if let Some(acct_num) = self.try_cond_pick_account_number() {
4179 if let Some(a) = self
4180 .coa
4181 .accounts
4182 .iter()
4183 .find(|a| a.account_number == acct_num)
4184 {
4185 return a;
4186 }
4187 }
4188 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
4189 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
4190
4191 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4193 liability_accounts
4194 } else {
4195 revenue_accounts
4196 };
4197
4198 let uniform = all.choose(&mut self.rng).copied();
4199 let enabled = self.config.account_concentration.unwrap_or(true);
4200 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4201 tracing::warn!(
4202 "Account selection returned empty list, falling back to first COA account"
4203 );
4204 &self.coa.accounts[0]
4205 })
4206 }
4207}
4208
4209impl Generator for JournalEntryGenerator {
4210 type Item = JournalEntry;
4211 type Config = (
4212 TransactionConfig,
4213 Arc<ChartOfAccounts>,
4214 Vec<String>,
4215 NaiveDate,
4216 NaiveDate,
4217 );
4218
4219 fn new(config: Self::Config, seed: u64) -> Self {
4220 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
4221 }
4222
4223 fn generate_one(&mut self) -> Self::Item {
4224 self.generate()
4225 }
4226
4227 fn reset(&mut self) {
4228 self.rng = seeded_rng(self.seed, 0);
4229 self.source_mix_rng = seeded_rng(self.seed, 50_063);
4230 self.template_rng = seeded_rng(self.seed, 70_081);
4231 self.recurring_archetypes.clear();
4232 self.reversal_rng = seeded_rng(self.seed, 90_017);
4233 self.reversal_buffer.clear();
4234 self.account_rng = seeded_rng(self.seed, 60_071);
4235 self.allocation_rng = seeded_rng(self.seed, 80_023);
4236 self.fx_rng = seeded_rng(self.seed, 70_093);
4237 self.line_sampler.reset(self.seed + 1);
4238 self.amount_sampler.reset(self.seed + 2);
4239 self.temporal_sampler.reset(self.seed + 3);
4240 if let Some(ref mut adv) = self.advanced_amount_sampler {
4241 adv.reset(self.seed + 2);
4242 }
4243 self.count = 0;
4244 self.uuid_factory.reset();
4245
4246 let mut ref_gen = ReferenceGenerator::new(
4248 self.start_date.year(),
4249 self.companies
4250 .first()
4251 .map(std::string::String::as_str)
4252 .unwrap_or("1000"),
4253 );
4254 ref_gen.set_prefix(
4255 ReferenceType::Invoice,
4256 &self.template_config.references.invoice_prefix,
4257 );
4258 ref_gen.set_prefix(
4259 ReferenceType::PurchaseOrder,
4260 &self.template_config.references.po_prefix,
4261 );
4262 ref_gen.set_prefix(
4263 ReferenceType::SalesOrder,
4264 &self.template_config.references.so_prefix,
4265 );
4266 self.reference_generator = ref_gen;
4267 }
4268
4269 fn count(&self) -> u64 {
4270 self.count
4271 }
4272
4273 fn seed(&self) -> u64 {
4274 self.seed
4275 }
4276}
4277
4278use datasynth_core::traits::ParallelGenerator;
4279
4280impl ParallelGenerator for JournalEntryGenerator {
4281 fn split(self, parts: usize) -> Vec<Self> {
4287 let parts = parts.max(1);
4288 (0..parts)
4289 .map(|i| {
4290 let sub_seed = self
4292 .seed
4293 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
4294
4295 let mut gen = JournalEntryGenerator::new_with_full_config(
4296 self.config.clone(),
4297 Arc::clone(&self.coa),
4298 self.companies.clone(),
4299 self.start_date,
4300 self.end_date,
4301 sub_seed,
4302 self.template_config.clone(),
4303 self.user_pool.clone(),
4304 );
4305
4306 gen.company_selector = self.company_selector.clone();
4308 gen.vendor_pool = self.vendor_pool.clone();
4309 gen.customer_pool = self.customer_pool.clone();
4310 gen.material_pool = self.material_pool.clone();
4311 gen.cost_center_pool = self.cost_center_pool.clone();
4317 gen.profit_center_pool = self.profit_center_pool.clone();
4318 gen.using_real_master_data = self.using_real_master_data;
4319 gen.fraud_config = self.fraud_config.clone();
4320 gen.persona_errors_enabled = self.persona_errors_enabled;
4321 gen.approval_enabled = self.approval_enabled;
4322 gen.approval_threshold = self.approval_threshold;
4323 gen.sod_violation_rate = self.sod_violation_rate;
4324 if let Some(mut adv) = self.advanced_amount_sampler.clone() {
4329 adv.reset(sub_seed.wrapping_add(2));
4330 gen.advanced_amount_sampler = Some(adv);
4331 }
4332 if let Some(mut cond) = self.conditional_amount_override.clone() {
4335 cond.reset(sub_seed.wrapping_add(17));
4336 gen.conditional_amount_override = Some(cond);
4337 }
4338 if let Some(mut cop) = self.correlation_copula.clone() {
4340 cop.reset(sub_seed.wrapping_add(31));
4341 gen.correlation_copula = Some(cop);
4342 }
4343
4344 gen.uuid_factory = DeterministicUuidFactory::for_partition(
4346 sub_seed,
4347 GeneratorType::JournalEntry,
4348 i as u8,
4349 );
4350
4351 if let Some(ref config) = self.temporal_patterns_config {
4353 gen.temporal_patterns_config = Some(config.clone());
4354 if config.business_days.enabled {
4356 if let Some(ref bdc) = self.business_day_calculator {
4357 gen.business_day_calculator = Some(bdc.clone());
4358 }
4359 }
4360 if config.processing_lags.enabled {
4362 let lag_config =
4363 Self::convert_processing_lag_config(&config.processing_lags);
4364 gen.processing_lag_calculator =
4365 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
4366 }
4367 }
4368
4369 if let Some(ref dc) = self.drift_controller {
4371 gen.drift_controller = Some(dc.clone());
4372 }
4373
4374 gen.loaded_priors = self.loaded_priors.clone();
4377
4378 if let Some(ref cal) = self.velocity_calibrator {
4383 let mut fresh = crate::velocity_calibrator::VelocityCalibrator::new(
4384 cal.target_trigger_rates.clone(),
4385 cal.n_lines_between_calibrations,
4386 );
4387 fresh.current_values = cal.current_values.clone();
4388 gen.velocity_calibrator = Some(fresh);
4389 }
4390
4391 gen
4392 })
4393 .collect()
4394 }
4395}
4396
4397#[cfg(test)]
4398mod tests {
4399 use super::*;
4400 use crate::ChartOfAccountsGenerator;
4401
4402 #[test]
4403 fn test_generate_balanced_entries() {
4404 let mut coa_gen =
4405 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4406 let coa = Arc::new(coa_gen.generate());
4407
4408 let mut je_gen = JournalEntryGenerator::new_with_params(
4409 TransactionConfig::default(),
4410 coa,
4411 vec!["1000".to_string()],
4412 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4413 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4414 42,
4415 );
4416
4417 let mut balanced_count = 0;
4418 for _ in 0..100 {
4419 let entry = je_gen.generate();
4420
4421 let has_human_error = entry
4423 .header
4424 .header_text
4425 .as_ref()
4426 .map(|t| t.contains("[HUMAN_ERROR:"))
4427 .unwrap_or(false);
4428
4429 if !has_human_error {
4430 assert!(
4431 entry.is_balanced(),
4432 "Entry {:?} is not balanced",
4433 entry.header.document_id
4434 );
4435 balanced_count += 1;
4436 }
4437 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
4438 }
4439
4440 assert!(
4442 balanced_count >= 80,
4443 "Expected at least 80 balanced entries, got {}",
4444 balanced_count
4445 );
4446 }
4447
4448 #[test]
4449 fn test_deterministic_generation() {
4450 let mut coa_gen =
4451 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4452 let coa = Arc::new(coa_gen.generate());
4453
4454 let mut gen1 = JournalEntryGenerator::new_with_params(
4455 TransactionConfig::default(),
4456 Arc::clone(&coa),
4457 vec!["1000".to_string()],
4458 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4459 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4460 42,
4461 );
4462
4463 let mut gen2 = JournalEntryGenerator::new_with_params(
4464 TransactionConfig::default(),
4465 coa,
4466 vec!["1000".to_string()],
4467 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4468 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4469 42,
4470 );
4471
4472 for _ in 0..50 {
4473 let e1 = gen1.generate();
4474 let e2 = gen2.generate();
4475 assert_eq!(e1.header.document_id, e2.header.document_id);
4476 assert_eq!(e1.total_debit(), e2.total_debit());
4477 }
4478 }
4479
4480 #[test]
4481 fn test_templates_generate_descriptions() {
4482 let mut coa_gen =
4483 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4484 let coa = Arc::new(coa_gen.generate());
4485
4486 let template_config = TemplateConfig {
4488 names: datasynth_config::schema::NameTemplateConfig {
4489 generate_realistic_names: true,
4490 email_domain: "test.com".to_string(),
4491 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
4492 },
4493 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
4494 generate_header_text: true,
4495 generate_line_text: true,
4496 },
4497 references: datasynth_config::schema::ReferenceTemplateConfig {
4498 generate_references: true,
4499 invoice_prefix: "TEST-INV".to_string(),
4500 po_prefix: "TEST-PO".to_string(),
4501 so_prefix: "TEST-SO".to_string(),
4502 },
4503 path: None,
4504 merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
4505 };
4506
4507 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4508 TransactionConfig::default(),
4509 coa,
4510 vec!["1000".to_string()],
4511 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4512 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4513 42,
4514 template_config,
4515 None,
4516 )
4517 .with_persona_errors(false); for _ in 0..10 {
4520 let entry = je_gen.generate();
4521
4522 assert!(
4524 entry.header.header_text.is_some(),
4525 "Header text should be populated"
4526 );
4527
4528 assert!(
4530 entry.header.reference.is_some(),
4531 "Reference should be populated"
4532 );
4533
4534 assert!(
4536 entry.header.business_process.is_some(),
4537 "Business process should be set"
4538 );
4539
4540 for line in &entry.lines {
4542 assert!(line.line_text.is_some(), "Line text should be populated");
4543 }
4544
4545 assert!(entry.is_balanced());
4547 }
4548 }
4549
4550 #[test]
4551 fn test_user_pool_integration() {
4552 let mut coa_gen =
4553 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4554 let coa = Arc::new(coa_gen.generate());
4555
4556 let companies = vec!["1000".to_string()];
4557
4558 let mut user_gen = crate::UserGenerator::new(42);
4560 let user_pool = user_gen.generate_standard(&companies);
4561
4562 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4563 TransactionConfig::default(),
4564 coa,
4565 companies,
4566 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4567 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4568 42,
4569 TemplateConfig::default(),
4570 Some(user_pool),
4571 );
4572
4573 for _ in 0..20 {
4575 let entry = je_gen.generate();
4576
4577 assert!(!entry.header.created_by.is_empty());
4580 }
4581 }
4582
4583 #[test]
4584 fn test_master_data_connection() {
4585 let mut coa_gen =
4586 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4587 let coa = Arc::new(coa_gen.generate());
4588
4589 let vendors = vec![
4591 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
4592 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
4593 ];
4594
4595 let customers = vec![
4597 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
4598 Customer::new(
4599 "C-TEST-002",
4600 "Test Customer Two",
4601 CustomerType::SmallBusiness,
4602 ),
4603 ];
4604
4605 let materials = vec![Material::new(
4607 "MAT-TEST-001",
4608 "Test Material A",
4609 MaterialType::RawMaterial,
4610 )];
4611
4612 let generator = JournalEntryGenerator::new_with_params(
4614 TransactionConfig::default(),
4615 coa,
4616 vec!["1000".to_string()],
4617 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4618 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4619 42,
4620 );
4621
4622 assert!(!generator.is_using_real_master_data());
4624
4625 let generator_with_data = generator
4627 .with_vendors(&vendors)
4628 .with_customers(&customers)
4629 .with_materials(&materials);
4630
4631 assert!(generator_with_data.is_using_real_master_data());
4633 }
4634
4635 #[test]
4636 fn test_with_master_data_convenience_method() {
4637 let mut coa_gen =
4638 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4639 let coa = Arc::new(coa_gen.generate());
4640
4641 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
4642 let customers = vec![Customer::new(
4643 "C-001",
4644 "Customer One",
4645 CustomerType::Corporate,
4646 )];
4647 let materials = vec![Material::new(
4648 "MAT-001",
4649 "Material One",
4650 MaterialType::RawMaterial,
4651 )];
4652
4653 let generator = JournalEntryGenerator::new_with_params(
4654 TransactionConfig::default(),
4655 coa,
4656 vec!["1000".to_string()],
4657 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4658 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4659 42,
4660 )
4661 .with_master_data(&vendors, &customers, &materials);
4662
4663 assert!(generator.is_using_real_master_data());
4664 }
4665
4666 #[test]
4667 fn test_stress_factors_increase_error_rate() {
4668 let mut coa_gen =
4669 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4670 let coa = Arc::new(coa_gen.generate());
4671
4672 let generator = JournalEntryGenerator::new_with_params(
4673 TransactionConfig::default(),
4674 coa,
4675 vec!["1000".to_string()],
4676 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4677 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4678 42,
4679 );
4680
4681 let base_rate = 0.1;
4682
4683 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
4686 assert!(
4687 (regular_rate - base_rate).abs() < 0.01,
4688 "Regular day should have minimal stress factor adjustment"
4689 );
4690
4691 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
4694 assert!(
4695 month_end_rate > regular_rate,
4696 "Month end should have higher error rate than regular day"
4697 );
4698
4699 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
4702 assert!(
4703 year_end_rate > month_end_rate,
4704 "Year end should have highest error rate"
4705 );
4706
4707 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
4710 assert!(
4711 friday_rate > regular_rate,
4712 "Friday should have higher error rate than mid-week"
4713 );
4714
4715 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
4718 assert!(
4719 monday_rate > regular_rate,
4720 "Monday should have higher error rate than mid-week"
4721 );
4722 }
4723
4724 #[test]
4725 fn test_batching_produces_similar_entries() {
4726 let mut coa_gen =
4727 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4728 let coa = Arc::new(coa_gen.generate());
4729
4730 let mut je_gen = JournalEntryGenerator::new_with_params(
4732 TransactionConfig::default(),
4733 coa,
4734 vec!["1000".to_string()],
4735 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4736 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4737 123,
4738 )
4739 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
4743
4744 for entry in &entries {
4746 assert!(
4747 entry.is_balanced(),
4748 "All entries including batched should be balanced"
4749 );
4750 }
4751
4752 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
4754 std::collections::HashMap::new();
4755 for entry in &entries {
4756 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
4757 }
4758
4759 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
4761 assert!(
4762 dates_with_multiple > 0,
4763 "With batching, should see some dates with multiple entries"
4764 );
4765 }
4766
4767 #[test]
4768 fn test_temporal_patterns_business_days() {
4769 use datasynth_config::schema::{
4770 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
4771 };
4772
4773 let mut coa_gen =
4774 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4775 let coa = Arc::new(coa_gen.generate());
4776
4777 let temporal_config = TemporalPatternsConfig {
4779 enabled: true,
4780 business_days: BusinessDaySchemaConfig {
4781 enabled: true,
4782 ..Default::default()
4783 },
4784 calendars: CalendarSchemaConfig {
4785 regions: vec!["US".to_string()],
4786 custom_holidays: vec![],
4787 },
4788 ..Default::default()
4789 };
4790
4791 let mut je_gen = JournalEntryGenerator::new_with_params(
4792 TransactionConfig::default(),
4793 coa,
4794 vec!["1000".to_string()],
4795 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4796 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
4798 )
4799 .with_temporal_patterns(temporal_config, 42)
4800 .with_persona_errors(false);
4801
4802 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
4804
4805 for entry in &entries {
4806 let weekday = entry.header.posting_date.weekday();
4807 assert!(
4808 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
4809 "Posting date {:?} should not be a weekend",
4810 entry.header.posting_date
4811 );
4812 }
4813 }
4814
4815 #[test]
4816 fn test_default_generation_filters_weekends() {
4817 let mut coa_gen =
4821 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4822 let coa = Arc::new(coa_gen.generate());
4823
4824 let mut je_gen = JournalEntryGenerator::new_with_params(
4825 TransactionConfig::default(),
4826 coa,
4827 vec!["1000".to_string()],
4828 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4829 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4830 42,
4831 )
4832 .with_persona_errors(false);
4833
4834 let total = 500;
4835 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4836
4837 let weekend_count = entries
4838 .iter()
4839 .filter(|e| {
4840 let wd = e.header.posting_date.weekday();
4841 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
4842 })
4843 .count();
4844
4845 let weekend_pct = weekend_count as f64 / total as f64;
4846 assert!(
4847 weekend_pct < 0.05,
4848 "Expected weekend entries <5% of total without temporal_patterns enabled, \
4849 but got {:.1}% ({}/{})",
4850 weekend_pct * 100.0,
4851 weekend_count,
4852 total
4853 );
4854 }
4855
4856 #[test]
4857 fn test_document_type_derived_from_business_process() {
4858 let mut coa_gen =
4859 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4860 let coa = Arc::new(coa_gen.generate());
4861
4862 let mut je_gen = JournalEntryGenerator::new_with_params(
4863 TransactionConfig::default(),
4864 coa,
4865 vec!["1000".to_string()],
4866 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4867 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4868 99,
4869 )
4870 .with_persona_errors(false)
4871 .with_batching(false);
4872
4873 let total = 200;
4874 let mut doc_types = std::collections::HashSet::new();
4875 let mut sa_count = 0_usize;
4876
4877 for _ in 0..total {
4878 let entry = je_gen.generate();
4879 let dt = &entry.header.document_type;
4880 doc_types.insert(dt.clone());
4881 if dt == "SA" {
4882 sa_count += 1;
4883 }
4884 }
4885
4886 assert!(
4888 doc_types.len() > 3,
4889 "Expected >3 distinct document types, got {} ({:?})",
4890 doc_types.len(),
4891 doc_types,
4892 );
4893
4894 let sa_pct = sa_count as f64 / total as f64;
4896 assert!(
4897 sa_pct < 0.50,
4898 "Expected SA <50%, got {:.1}% ({}/{})",
4899 sa_pct * 100.0,
4900 sa_count,
4901 total,
4902 );
4903 }
4904
4905 #[test]
4906 fn test_enrich_line_items_account_description() {
4907 let mut coa_gen =
4908 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4909 let coa = Arc::new(coa_gen.generate());
4910
4911 let mut je_gen = JournalEntryGenerator::new_with_params(
4912 TransactionConfig::default(),
4913 coa,
4914 vec!["1000".to_string()],
4915 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4916 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4917 42,
4918 )
4919 .with_persona_errors(false);
4920
4921 let total = 200;
4922 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4923
4924 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
4926 let lines_with_desc: usize = entries
4927 .iter()
4928 .flat_map(|e| &e.lines)
4929 .filter(|l| l.account_description.is_some())
4930 .count();
4931
4932 let desc_pct = lines_with_desc as f64 / total_lines as f64;
4933 assert!(
4934 desc_pct > 0.95,
4935 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
4936 desc_pct * 100.0,
4937 lines_with_desc,
4938 total_lines,
4939 );
4940 }
4941
4942 #[test]
4943 fn test_enrich_line_items_cost_center_for_expense_accounts() {
4944 let mut coa_gen =
4945 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4946 let coa = Arc::new(coa_gen.generate());
4947
4948 let mut je_gen = JournalEntryGenerator::new_with_params(
4949 TransactionConfig::default(),
4950 coa,
4951 vec!["1000".to_string()],
4952 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4953 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4954 42,
4955 )
4956 .with_persona_errors(false);
4957
4958 let total = 300;
4959 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4960
4961 let expense_lines: Vec<&JournalEntryLine> = entries
4963 .iter()
4964 .flat_map(|e| &e.lines)
4965 .filter(|l| {
4966 let first = l.gl_account.chars().next().unwrap_or('0');
4967 first == '5' || first == '6'
4968 })
4969 .collect();
4970
4971 if !expense_lines.is_empty() {
4972 let with_cc = expense_lines
4973 .iter()
4974 .filter(|l| l.cost_center.is_some())
4975 .count();
4976 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
4977 assert!(
4978 cc_pct > 0.80,
4979 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
4980 cc_pct * 100.0,
4981 with_cc,
4982 expense_lines.len(),
4983 );
4984 }
4985 }
4986
4987 #[test]
4988 fn test_enrich_line_items_profit_center_and_line_text() {
4989 let mut coa_gen =
4990 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4991 let coa = Arc::new(coa_gen.generate());
4992
4993 let mut je_gen = JournalEntryGenerator::new_with_params(
4994 TransactionConfig::default(),
4995 coa,
4996 vec!["1000".to_string()],
4997 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4998 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4999 42,
5000 )
5001 .with_persona_errors(false);
5002
5003 let total = 100;
5004 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
5005
5006 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
5007
5008 let with_pc = entries
5010 .iter()
5011 .flat_map(|e| &e.lines)
5012 .filter(|l| l.profit_center.is_some())
5013 .count();
5014 let pc_pct = with_pc as f64 / total_lines as f64;
5015 assert!(
5016 pc_pct > 0.95,
5017 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
5018 pc_pct * 100.0,
5019 with_pc,
5020 total_lines,
5021 );
5022
5023 let with_text = entries
5025 .iter()
5026 .flat_map(|e| &e.lines)
5027 .filter(|l| l.line_text.is_some())
5028 .count();
5029 let text_pct = with_text as f64 / total_lines as f64;
5030 assert!(
5031 text_pct > 0.95,
5032 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
5033 text_pct * 100.0,
5034 with_text,
5035 total_lines,
5036 );
5037 }
5038
5039 #[test]
5042 fn test_je_has_audit_flags() {
5043 let mut coa_gen =
5044 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5045 let coa = Arc::new(coa_gen.generate());
5046
5047 let mut je_gen = JournalEntryGenerator::new_with_params(
5048 TransactionConfig::default(),
5049 coa,
5050 vec!["1000".to_string()],
5051 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5052 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5053 42,
5054 )
5055 .with_persona_errors(false);
5056
5057 for _ in 0..100 {
5058 let entry = je_gen.generate();
5059
5060 assert!(
5062 !entry.header.source_system.is_empty(),
5063 "source_system should be populated, got empty string"
5064 );
5065
5066 assert!(
5068 !entry.header.created_by.is_empty(),
5069 "created_by should be populated"
5070 );
5071
5072 assert!(
5074 entry.header.created_date.is_some(),
5075 "created_date should be populated"
5076 );
5077 }
5078 }
5079
5080 #[test]
5081 fn test_manual_entry_rate() {
5082 let mut coa_gen =
5083 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5084 let coa = Arc::new(coa_gen.generate());
5085
5086 let mut je_gen = JournalEntryGenerator::new_with_params(
5087 TransactionConfig::default(),
5088 coa,
5089 vec!["1000".to_string()],
5090 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5091 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5092 42,
5093 )
5094 .with_persona_errors(false)
5095 .with_batching(false);
5096
5097 let total = 1000;
5098 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
5099
5100 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
5101 let manual_rate = manual_count as f64 / total as f64;
5102
5103 assert!(
5106 manual_rate > 0.01 && manual_rate < 0.50,
5107 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
5108 manual_rate * 100.0,
5109 manual_count,
5110 total,
5111 );
5112
5113 for entry in &entries {
5115 let source_is_manual = entry.header.source == TransactionSource::Manual;
5116 assert_eq!(
5117 entry.header.is_manual, source_is_manual,
5118 "is_manual should match source == Manual"
5119 );
5120 }
5121 }
5122
5123 #[test]
5124 fn test_manual_source_consistency() {
5125 let mut coa_gen =
5126 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5127 let coa = Arc::new(coa_gen.generate());
5128
5129 let mut je_gen = JournalEntryGenerator::new_with_params(
5130 TransactionConfig::default(),
5131 coa,
5132 vec!["1000".to_string()],
5133 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5134 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5135 42,
5136 )
5137 .with_persona_errors(false)
5138 .with_batching(false);
5139
5140 for _ in 0..500 {
5141 let entry = je_gen.generate();
5142
5143 if entry.header.is_manual {
5144 let s = entry.header.source_system.as_str();
5149 assert!(
5150 s == "manual"
5151 || s == "spreadsheet"
5152 || s.starts_with("manual/")
5153 || s.starts_with("spreadsheet/"),
5154 "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
5155 );
5156 } else {
5157 let s = entry.header.source_system.as_str();
5159 assert!(
5160 !(s == "manual"
5161 || s == "spreadsheet"
5162 || s.starts_with("manual/")
5163 || s.starts_with("spreadsheet/")),
5164 "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
5165 );
5166 }
5167 }
5168 }
5169
5170 #[test]
5171 fn test_default_source_codes_breadth() {
5172 let mut coa_gen =
5177 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 7);
5178 let coa = Arc::new(coa_gen.generate());
5179 let mut je_gen = JournalEntryGenerator::new_with_params(
5180 TransactionConfig::default(),
5181 coa,
5182 vec!["1000".to_string()],
5183 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5184 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5185 7,
5186 )
5187 .with_persona_errors(false)
5188 .with_batching(false);
5189
5190 let mut codes = std::collections::HashSet::new();
5191 for _ in 0..500 {
5192 let e = je_gen.generate();
5193 let code = e
5194 .header
5195 .sap_source_code
5196 .expect("default config should populate sap_source_code");
5197 codes.insert(code);
5198 }
5199 assert!(
5200 codes.len() >= 10,
5201 "default source-mix should be broad (>=10 distinct codes), got {}",
5202 codes.len()
5203 );
5204 }
5205
5206 #[test]
5207 fn test_source_codes_opt_out() {
5208 let mut coa_gen =
5211 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 9);
5212 let coa = Arc::new(coa_gen.generate());
5213 let cfg = TransactionConfig {
5214 synthetic_source_codes: Some(false),
5215 ..TransactionConfig::default()
5216 };
5217 let mut je_gen = JournalEntryGenerator::new_with_params(
5218 cfg,
5219 coa,
5220 vec!["1000".to_string()],
5221 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5222 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5223 9,
5224 )
5225 .with_persona_errors(false)
5226 .with_batching(false);
5227 for _ in 0..50 {
5228 let e = je_gen.generate();
5229 assert!(
5230 e.header.sap_source_code.is_none(),
5231 "opt-out should leave sap_source_code None (legacy enum source)"
5232 );
5233 }
5234 }
5235
5236 #[test]
5237 fn test_recurring_templates_reuse_archetypes() {
5238 fn run(recurring: Option<bool>) -> (usize, usize, bool) {
5242 let mut coa_gen = ChartOfAccountsGenerator::new(
5243 CoAComplexity::Medium,
5244 IndustrySector::Manufacturing,
5245 11,
5246 );
5247 let coa = Arc::new(coa_gen.generate());
5248 let cfg = TransactionConfig {
5249 recurring_templates: recurring,
5250 ..TransactionConfig::default()
5251 };
5252 let mut g = JournalEntryGenerator::new_with_params(
5253 cfg,
5254 coa,
5255 vec!["1000".to_string()],
5256 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5257 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5258 11,
5259 )
5260 .with_persona_errors(false)
5261 .with_batching(false);
5262 let n = 800;
5263 let mut arche = std::collections::HashSet::new();
5264 let mut balanced = true;
5265 for _ in 0..n {
5266 let e = g.generate();
5267 if !e.is_balanced() {
5268 balanced = false;
5269 }
5270 let mut sig: Vec<(String, bool)> = e
5271 .lines
5272 .iter()
5273 .map(|l| (l.gl_account.clone(), l.debit_amount > Decimal::ZERO))
5274 .collect();
5275 sig.sort();
5276 arche.insert(sig);
5277 }
5278 (n, arche.len(), balanced)
5279 }
5280 let (n, distinct_on, bal_on) = run(Some(true));
5281 let (_, distinct_off, bal_off) = run(Some(false));
5282 assert!(bal_on && bal_off, "balance preserved in both modes");
5283 assert!(
5284 distinct_on < distinct_off,
5285 "templating should reduce distinct archetypes ({distinct_on} on vs {distinct_off} off)"
5286 );
5287 assert!(
5288 distinct_on * 2 < n,
5289 "templating should reuse heavily: {distinct_on} distinct archetypes over {n} JEs"
5290 );
5291 }
5292
5293 #[test]
5294 fn test_reversal_process_emits_balanced_reversals() {
5295 fn run(rate: Option<f64>) -> (usize, bool) {
5298 let mut coa_gen = ChartOfAccountsGenerator::new(
5299 CoAComplexity::Small,
5300 IndustrySector::Manufacturing,
5301 13,
5302 );
5303 let coa = Arc::new(coa_gen.generate());
5304 let cfg = TransactionConfig {
5305 reversal_rate: rate,
5306 ..TransactionConfig::default()
5307 };
5308 let mut g = JournalEntryGenerator::new_with_params(
5309 cfg,
5310 coa,
5311 vec!["1000".to_string()],
5312 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5313 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5314 13,
5315 )
5316 .with_persona_errors(false)
5317 .with_batching(false);
5318 let mut reversals = 0;
5319 let mut balanced = true;
5320 for _ in 0..1000 {
5321 let e = g.generate();
5322 if !e.is_balanced() {
5323 balanced = false;
5324 }
5325 if e.header
5326 .header_text
5327 .as_deref()
5328 .is_some_and(|t| t.starts_with("Reversal of"))
5329 {
5330 reversals += 1;
5331 }
5332 }
5333 (reversals, balanced)
5334 }
5335 let (rev_on, bal_on) = run(Some(0.05));
5336 let (rev_off, bal_off) = run(Some(0.0));
5337 assert!(bal_on && bal_off, "all entries balanced incl. reversals");
5338 assert_eq!(rev_off, 0, "rate 0.0 emits no reversals, got {rev_off}");
5339 assert!(rev_on > 0, "rate 0.05 should emit reversals, got {rev_on}");
5340 }
5341
5342 #[test]
5343 fn test_account_concentration_creates_pareto() {
5344 fn run(concentration: Option<bool>) -> (f64, bool) {
5349 let mut coa_gen = ChartOfAccountsGenerator::new(
5350 CoAComplexity::Medium,
5351 IndustrySector::Manufacturing,
5352 17,
5353 );
5354 let coa = Arc::new(coa_gen.generate());
5355 let cfg = TransactionConfig {
5356 account_concentration: concentration,
5357 recurring_templates: Some(false),
5358 reversal_rate: Some(0.0),
5359 ..TransactionConfig::default()
5360 };
5361 let mut g = JournalEntryGenerator::new_with_params(
5362 cfg,
5363 coa,
5364 vec!["1000".to_string()],
5365 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5366 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5367 17,
5368 )
5369 .with_persona_errors(false)
5370 .with_batching(false);
5371 let mut counts: std::collections::HashMap<String, usize> =
5372 std::collections::HashMap::new();
5373 let mut total_lines = 0usize;
5374 let mut balanced = true;
5375 for _ in 0..1000 {
5376 let e = g.generate();
5377 if !e.is_balanced() {
5378 balanced = false;
5379 }
5380 for l in &e.lines {
5381 *counts.entry(l.gl_account.clone()).or_default() += 1;
5382 total_lines += 1;
5383 }
5384 }
5385 let mut v: Vec<usize> = counts.values().copied().collect();
5388 v.sort_unstable_by(|a, b| b.cmp(a));
5389 let top_k = ((v.len() as f64 * 0.10).ceil() as usize).max(1);
5390 let top_share = v.iter().take(top_k).sum::<usize>() as f64 / total_lines as f64;
5391 (top_share, balanced)
5392 }
5393 let (share_on, bal_on) = run(Some(true));
5394 let (share_off, bal_off) = run(Some(false));
5395 assert!(bal_on && bal_off, "balance preserved in both modes");
5396 assert!(
5397 share_on > share_off + 0.20,
5398 "concentration should raise the top-10% line share ({share_on:.3} on vs {share_off:.3} off)"
5399 );
5400 assert!(
5401 share_on > 0.50,
5402 "hot accounts should dominate: top-10% line share {share_on:.3}"
5403 );
5404 }
5405
5406 #[test]
5407 fn test_allocation_batch_emits_large_balanced_postings() {
5408 fn run(rate: Option<f64>) -> (usize, bool, usize) {
5413 let mut coa_gen = ChartOfAccountsGenerator::new(
5414 CoAComplexity::Small,
5415 IndustrySector::Manufacturing,
5416 23,
5417 );
5418 let coa = Arc::new(coa_gen.generate());
5419 let cfg = TransactionConfig {
5420 allocation_batch_rate: rate,
5421 reversal_rate: Some(0.0),
5422 ..TransactionConfig::default()
5423 };
5424 let mut g = JournalEntryGenerator::new_with_params(
5425 cfg,
5426 coa,
5427 vec!["1000".to_string()],
5428 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5429 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5430 23,
5431 )
5432 .with_persona_errors(false)
5433 .with_batching(false);
5434 let mut batches = 0usize;
5435 let mut balanced = true;
5436 let mut max_distinct_cc = 0usize;
5437 for _ in 0..2000 {
5438 let e = g.generate();
5439 if !e.is_balanced() {
5440 balanced = false;
5441 }
5442 if e.header.sap_source_code.as_deref() == Some("AB") {
5443 batches += 1;
5444 assert!(
5445 e.lines.len() >= ALLOCATION_MIN_TARGETS as usize,
5446 "allocation batch should be large, got {} lines",
5447 e.lines.len()
5448 );
5449 let ccs: std::collections::HashSet<String> = e
5450 .lines
5451 .iter()
5452 .filter_map(|l| l.cost_center.clone())
5453 .collect();
5454 max_distinct_cc = max_distinct_cc.max(ccs.len());
5455 }
5456 }
5457 (batches, balanced, max_distinct_cc)
5458 }
5459 let (on, bal_on, cc) = run(Some(0.10));
5460 let (off, bal_off, _) = run(Some(0.0));
5461 assert!(
5462 bal_on && bal_off,
5463 "all entries balanced incl. allocation batches"
5464 );
5465 assert_eq!(off, 0, "rate 0.0 emits no allocation batches, got {off}");
5466 assert!(on > 0, "rate 0.10 should emit allocation batches, got {on}");
5467 assert!(
5468 cc > 1,
5469 "allocation should spread across multiple cost centers, got {cc}"
5470 );
5471 }
5472
5473 #[test]
5474 fn test_derived_id_processes_keep_document_ids_unique() {
5475 let mut coa_gen =
5480 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 31);
5481 let coa = Arc::new(coa_gen.generate());
5482 let cfg = TransactionConfig {
5483 reversal_rate: Some(0.15),
5484 allocation_batch_rate: Some(0.10),
5485 ..TransactionConfig::default()
5486 };
5487 let mut g = JournalEntryGenerator::new_with_params(
5488 cfg,
5489 coa,
5490 vec!["1000".to_string()],
5491 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5492 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5493 31,
5494 )
5495 .with_persona_errors(false)
5496 .with_batching(false);
5497 let mut ids = std::collections::HashSet::new();
5498 let n = 3000;
5499 for _ in 0..n {
5500 let e = g.generate();
5501 assert!(
5502 ids.insert(e.header.document_id),
5503 "duplicate document id {} (derived-id collision)",
5504 e.header.document_id
5505 );
5506 }
5507 assert_eq!(ids.len(), n, "all {n} document ids unique");
5508 }
5509
5510 #[test]
5511 fn test_business_unit_rolls_up_from_cost_center() {
5512 fn run(enabled: Option<bool>) -> (usize, usize, bool, bool) {
5517 let mut coa_gen = ChartOfAccountsGenerator::new(
5518 CoAComplexity::Medium,
5519 IndustrySector::Manufacturing,
5520 19,
5521 );
5522 let coa = Arc::new(coa_gen.generate());
5523 let cfg = TransactionConfig {
5524 business_unit_dimension: enabled,
5525 ..TransactionConfig::default()
5526 };
5527 let mut g = JournalEntryGenerator::new_with_params(
5528 cfg,
5529 coa,
5530 vec!["1000".to_string()],
5531 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5532 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5533 19,
5534 )
5535 .with_persona_errors(false)
5536 .with_batching(false);
5537 let mut dim_lines = 0usize;
5538 let mut bu_lines = 0usize;
5539 let mut consistent = true; let mut well_formed = true; let mut dim_to_bu: std::collections::HashMap<String, String> =
5542 std::collections::HashMap::new();
5543 for _ in 0..600 {
5544 let e = g.generate();
5545 for l in &e.lines {
5546 let dim = l.cost_center.as_deref().or(l.profit_center.as_deref());
5548 if dim.is_some() {
5549 dim_lines += 1;
5550 }
5551 if let Some(bu) = &l.business_unit {
5552 bu_lines += 1;
5553 let d = dim.unwrap_or_default().to_string();
5554 if bu != &JournalEntryGenerator::business_unit_for_dimension(&d) {
5555 consistent = false;
5556 }
5557 if dim_to_bu
5559 .insert(d, bu.clone())
5560 .is_some_and(|prev| &prev != bu)
5561 {
5562 consistent = false;
5563 }
5564 let n_ok = bu.strip_prefix("BU").and_then(|d| d.parse::<u32>().ok());
5565 if !matches!(n_ok, Some(1..=11)) {
5566 well_formed = false;
5567 }
5568 }
5569 }
5570 }
5571 (dim_lines, bu_lines, consistent, well_formed)
5572 }
5573 let (dim_on, bu_on, consistent, well_formed) = run(Some(true));
5574 let (_, bu_off, _, _) = run(Some(false));
5575 assert!(
5576 dim_on > 0 && bu_on > 0,
5577 "BU should be populated where CC/PC is"
5578 );
5579 assert_eq!(
5580 dim_on, bu_on,
5581 "every CC/PC-bearing line gets a BU ({dim_on} dim vs {bu_on} BU)"
5582 );
5583 assert!(
5584 consistent,
5585 "BU must be the deterministic roll-up of its CC/PC"
5586 );
5587 assert!(well_formed, "BU codes must be BU01..BU11");
5588 assert_eq!(bu_off, 0, "dimension off ⇒ no business_unit, got {bu_off}");
5589 }
5590
5591 #[test]
5592 fn test_foreign_currency_sap_style() {
5593 fn run(rate: Option<f64>) -> (usize, bool, bool) {
5599 let mut coa_gen = ChartOfAccountsGenerator::new(
5600 CoAComplexity::Small,
5601 IndustrySector::Manufacturing,
5602 29,
5603 );
5604 let coa = Arc::new(coa_gen.generate());
5605 let cfg = TransactionConfig {
5606 foreign_currency_rate: rate,
5607 reversal_rate: Some(0.0),
5608 allocation_batch_rate: Some(0.0),
5609 ..TransactionConfig::default()
5610 };
5611 let mut g = JournalEntryGenerator::new_with_params(
5612 cfg,
5613 coa,
5614 vec!["1000".to_string()],
5615 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5616 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5617 29,
5618 )
5619 .with_persona_errors(false)
5620 .with_batching(false);
5621 let mut foreign = 0usize;
5622 let mut ledger_ok = true; let mut txn_ok = true; for _ in 0..1500 {
5625 let e = g.generate();
5626 if !e.is_balanced() {
5627 ledger_ok = false;
5628 }
5629 if e.header.currency != "USD" {
5630 foreign += 1;
5631 if !e.lines.iter().all(|l| l.transaction_amount.is_some()) {
5632 txn_ok = false;
5633 }
5634 let td: Decimal = e
5635 .lines
5636 .iter()
5637 .filter(|l| l.debit_amount > Decimal::ZERO)
5638 .filter_map(|l| l.transaction_amount)
5639 .sum();
5640 let tc: Decimal = e
5641 .lines
5642 .iter()
5643 .filter(|l| l.credit_amount > Decimal::ZERO)
5644 .filter_map(|l| l.transaction_amount)
5645 .sum();
5646 let tol = Decimal::new(e.lines.len() as i64, 2);
5648 if (td - tc).abs() > tol {
5649 txn_ok = false;
5650 }
5651 }
5652 }
5653 (foreign, ledger_ok, txn_ok)
5654 }
5655 let (fon, lbal_on, tbal_on) = run(Some(0.20));
5656 let (foff, lbal_off, _) = run(Some(0.0));
5657 assert!(
5658 lbal_on && lbal_off,
5659 "ledger balance (debit==credit) preserved in both modes"
5660 );
5661 assert!(
5662 fon > 0,
5663 "rate 0.20 should produce foreign-currency JEs, got {fon}"
5664 );
5665 assert_eq!(foff, 0, "rate 0.0 ⇒ no foreign JEs, got {foff}");
5666 assert!(
5667 tbal_on,
5668 "foreign JEs carry transaction_amount + balance in the transaction currency"
5669 );
5670 }
5671
5672 #[test]
5673 fn test_created_date_before_posting() {
5674 let mut coa_gen =
5675 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5676 let coa = Arc::new(coa_gen.generate());
5677
5678 let mut je_gen = JournalEntryGenerator::new_with_params(
5679 TransactionConfig::default(),
5680 coa,
5681 vec!["1000".to_string()],
5682 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5683 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5684 42,
5685 )
5686 .with_persona_errors(false);
5687
5688 for _ in 0..500 {
5689 let entry = je_gen.generate();
5690
5691 if let Some(created_date) = entry.header.created_date {
5692 let created_naive_date = created_date.date();
5693 assert!(
5694 created_naive_date <= entry.header.posting_date,
5695 "created_date ({}) should be <= posting_date ({})",
5696 created_naive_date,
5697 entry.header.posting_date,
5698 );
5699 }
5700 }
5701 }
5702
5703 #[test]
5707 fn apply_calibration_step_updates_lognormal_sigma() {
5708 let mut coa_gen =
5709 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5710 let coa = Arc::new(coa_gen.generate());
5711
5712 let mut gen = JournalEntryGenerator::new_with_params(
5713 TransactionConfig::default(),
5714 coa,
5715 vec!["1000".to_string()],
5716 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5717 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5718 42,
5719 );
5720
5721 let baseline_sigma = gen.amount_sampler.lognormal_sigma();
5722
5723 let step_sigma = crate::velocity_calibrator::CalibrationStep {
5724 rule_id: "R6".to_string(),
5725 parameter: "amounts.lognormal_sigma".to_string(),
5726 delta: 0.01,
5727 new_value: baseline_sigma + 0.01,
5728 };
5729 gen.apply_calibration_step(&step_sigma);
5730 assert!(
5731 (gen.amount_sampler.lognormal_sigma() - (baseline_sigma + 0.01)).abs() < 1e-9,
5732 "lognormal_sigma should be updated to {}",
5733 baseline_sigma + 0.01
5734 );
5735
5736 let baseline_round = gen.amount_sampler.round_number_probability();
5737 let step_round = crate::velocity_calibrator::CalibrationStep {
5738 rule_id: "R9".to_string(),
5739 parameter: "amounts.round_dollar_share".to_string(),
5740 delta: -0.005,
5741 new_value: (baseline_round - 0.005).max(0.0),
5742 };
5743 gen.apply_calibration_step(&step_round);
5744 let expected = (baseline_round - 0.005).max(0.0).clamp(0.0, 1.0);
5745 assert!(
5746 (gen.amount_sampler.round_number_probability() - expected).abs() < 1e-9,
5747 "round_number_probability should be updated to {}",
5748 expected
5749 );
5750 }
5751
5752 #[test]
5753 fn master_data_resolver_fills_every_pii_kind() {
5754 use datasynth_core::distributions::text_taxonomy::{
5755 PiiPlaceholderKind, PlaceholderResolver,
5756 };
5757 let mut r = MasterDataResolver {
5758 companies: vec!["Acme AG".to_string()],
5759 persons: vec!["Hans Muster".to_string()],
5760 streets: vec!["Hauptstrasse 1".to_string()],
5761 patients: vec!["Patient X".to_string()],
5762 };
5763 let mut rng = rand::rng();
5764 assert_eq!(r.resolve(PiiPlaceholderKind::Company, &mut rng), "Acme AG");
5765 assert_eq!(
5766 r.resolve(PiiPlaceholderKind::Person, &mut rng),
5767 "Hans Muster"
5768 );
5769 assert_eq!(
5770 r.resolve(PiiPlaceholderKind::Street, &mut rng),
5771 "Hauptstrasse 1"
5772 );
5773 assert_eq!(
5774 r.resolve(PiiPlaceholderKind::Patient, &mut rng),
5775 "Patient X"
5776 );
5777 }
5778
5779 #[test]
5780 fn master_data_resolver_empty_pool_falls_back() {
5781 use datasynth_core::distributions::text_taxonomy::{
5782 PiiPlaceholderKind, PlaceholderResolver,
5783 };
5784 let mut r = MasterDataResolver::default();
5785 let mut rng = rand::rng();
5786 let v = r.resolve(PiiPlaceholderKind::Company, &mut rng);
5787 assert!(!v.is_empty());
5788 }
5789
5790 #[test]
5802 fn synthetic_patient_pool_entries_pass_residual_scan() {
5803 use datasynth_core::distributions::text_taxonomy::PlaceholderGrammar;
5804 for name in synthetic_patient_pool("de_CH") {
5805 let filled = format!("*{name} G:2024-01-15 E:2024-01-20 A:2024-02-01");
5806 let structural: Vec<_> = PlaceholderGrammar::residual_pii_scan(&filled)
5807 .into_iter()
5808 .filter(|h| h.pattern != "given_name")
5809 .collect();
5810 assert!(
5811 structural.is_empty(),
5812 "synthetic patient name {name:?} fills to PII-shaped {filled:?}: {structural:?}"
5813 );
5814 }
5815 }
5816
5817 #[test]
5818 fn master_data_resolver_fallbacks_are_non_empty_and_placeholder_free() {
5819 use datasynth_core::distributions::text_taxonomy::{
5820 PiiPlaceholderKind, PlaceholderResolver,
5821 };
5822 let mut r = MasterDataResolver::default();
5826 let mut rng = rand::rng();
5827 for kind in [
5828 PiiPlaceholderKind::Company,
5829 PiiPlaceholderKind::Person,
5830 PiiPlaceholderKind::Street,
5831 PiiPlaceholderKind::Patient,
5832 ] {
5833 let v = r.resolve(kind, &mut rng);
5834 assert!(!v.is_empty(), "fallback for {kind:?} must be non-empty");
5835 assert!(
5836 !v.contains('{'),
5837 "fallback for {kind:?} must not contain a placeholder token"
5838 );
5839 }
5840 }
5841}