1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::{Arc, LazyLock};
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15 TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18 AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19 DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20 PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21 ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34use datasynth_core::distributions::text_taxonomy::{PiiPlaceholderKind, PlaceholderResolver};
35
36static DEFAULT_SOURCE_MIX: LazyLock<
40 datasynth_core::distributions::behavioral_priors::SourceMixPrior,
41> = LazyLock::new(datasynth_core::distributions::behavioral_priors::SourceMixPrior::sap_default);
42
43const DEFAULT_REVERSAL_RATE: f64 = 0.10;
48
49const DEFAULT_ALLOCATION_RATE: f64 = 0.008;
55const FOREIGN_CCYS: &[(&str, f64)] = &[
58 ("EUR", 1.09),
59 ("GBP", 1.27),
60 ("CHF", 1.12),
61 ("CAD", 0.74),
62 ("JPY", 0.0068),
63 ("AUD", 0.66),
64 ("CNY", 0.14),
65];
66const ALLOCATION_MIN_TARGETS: u32 = 30;
69const ALLOCATION_MAX_TARGETS: u32 = 80;
70
71const ZIPF_ALPHA: f64 = 2.0;
75const ZIPF_CAP: usize = 16_384;
78static ZIPF_CUM: LazyLock<Vec<f64>> = LazyLock::new(|| {
82 let mut cum = Vec::with_capacity(ZIPF_CAP + 1);
83 cum.push(0.0);
84 let mut acc = 0.0_f64;
85 for i in 1..=ZIPF_CAP {
86 acc += 1.0 / (i as f64).powf(ZIPF_ALPHA);
87 cum.push(acc);
88 }
89 cum
90});
91
92#[derive(Debug, Default)]
99pub struct MasterDataResolver {
100 pub companies: Vec<String>,
101 pub persons: Vec<String>,
102 pub streets: Vec<String>,
103 pub patients: Vec<String>,
104}
105
106impl PlaceholderResolver for MasterDataResolver {
107 fn resolve(&mut self, kind: PiiPlaceholderKind, rng: &mut dyn rand::Rng) -> String {
108 use rand::RngExt;
109 let (pool, fallback): (&Vec<String>, &str) = match kind {
110 PiiPlaceholderKind::Company => (&self.companies, "Synthetic Company AG"),
111 PiiPlaceholderKind::Person => (&self.persons, "Synthetic Person"),
112 PiiPlaceholderKind::Street => (&self.streets, "Synthetic Street 1"),
113 PiiPlaceholderKind::Patient => (&self.patients, "Synthetic Patient"),
114 };
115 if pool.is_empty() {
116 return fallback.to_string();
117 }
118 let idx = rng.random_range(0..pool.len());
119 pool[idx].clone()
120 }
121}
122
123fn synthetic_patient_pool(_locale: &str) -> Vec<String> {
135 [
136 "Alex Beispiel",
137 "Bea Muster",
138 "Cleo Synthetic",
139 "Demo Example",
140 "Erik Probe",
141 "Fred Testperson",
142 "Gerda Platzhalter",
143 "Hans Demo",
144 ]
145 .iter()
146 .map(|s| s.to_string())
147 .collect()
148}
149
150pub struct JournalEntryGenerator {
152 rng: ChaCha8Rng,
153 source_mix_rng: ChaCha8Rng,
157 recurring_archetypes:
161 std::collections::HashMap<(String, String), Vec<(Vec<String>, Vec<String>)>>,
162 template_rng: ChaCha8Rng,
165 reversal_buffer: Vec<JournalEntry>,
169 reversal_rng: ChaCha8Rng,
172 account_rng: ChaCha8Rng,
177 allocation_rng: ChaCha8Rng,
181 fx_rng: ChaCha8Rng,
185 cond_pair_rng: ChaCha8Rng,
190 cond_pair_sampler: Option<
193 datasynth_core::distributions::source_conditional_pair::SourceConditionalPairSampler,
194 >,
195 current_je_source: Option<String>,
198 seed: u64,
199 config: TransactionConfig,
200 coa: Arc<ChartOfAccounts>,
201 companies: Vec<String>,
202 company_currencies: std::collections::HashMap<String, String>,
208 company_selector: WeightedCompanySelector,
209 line_sampler: LineItemSampler,
210 amount_sampler: AmountSampler,
211 temporal_sampler: TemporalSampler,
212 start_date: NaiveDate,
213 end_date: NaiveDate,
214 count: u64,
215 uuid_factory: DeterministicUuidFactory,
216 user_pool: Option<UserPool>,
218 description_generator: DescriptionGenerator,
219 reference_generator: ReferenceGenerator,
220 template_config: TemplateConfig,
221 vendor_pool: VendorPool,
222 customer_pool: CustomerPool,
223 material_pool: Option<MaterialPool>,
225 cost_center_pool: Vec<String>,
231 profit_center_pool: Vec<String>,
235 using_real_master_data: bool,
237 fraud_config: FraudConfig,
239 persona_errors_enabled: bool,
241 approval_enabled: bool,
243 approval_threshold: rust_decimal::Decimal,
244 sod_violation_rate: f64,
246 batch_state: Option<BatchState>,
248 batching_enabled: bool,
252 drift_controller: Option<DriftController>,
254 business_day_calculator: Option<BusinessDayCalculator>,
256 processing_lag_calculator: Option<ProcessingLagCalculator>,
257 temporal_patterns_config: Option<TemporalPatternsConfig>,
258 business_process_weights: [(BusinessProcess, f64); 5],
262 advanced_amount_sampler: Option<AdvancedAmountSampler>,
266 conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
274 correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
280 pub loaded_priors: Option<crate::priors_loader::LoadedPriors>,
284 iet_day_accum: std::collections::HashMap<String, f64>,
288 iet_burst_remaining: std::collections::HashMap<String, u8>,
296 last_tp_by_source: std::collections::HashMap<String, String>,
300 pub velocity_calibrator: Option<crate::velocity_calibrator::VelocityCalibrator>,
303 md_resolver: MasterDataResolver,
307}
308
309const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
310 (BusinessProcess::O2C, 0.35),
311 (BusinessProcess::P2P, 0.30),
312 (BusinessProcess::R2R, 0.20),
313 (BusinessProcess::H2R, 0.10),
314 (BusinessProcess::A2R, 0.05),
315];
316
317impl JournalEntryGenerator {
343 fn supported_conditional_input(field: &str) -> bool {
344 matches!(
345 field,
346 "month"
347 | "quarter"
348 | "year"
349 | "day_of_week"
350 | "day_of_month"
351 | "day_of_year"
352 | "week_of_year"
353 | "is_period_end"
354 | "is_quarter_end"
355 | "is_year_end"
356 | "constant"
357 | ""
358 )
359 }
360
361 fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
362 let input_field = match self
363 .conditional_amount_override
364 .as_ref()
365 .map(|s| s.config().input_field.as_str())
366 {
367 Some(f) => f,
368 None => return 0.0,
369 };
370
371 let is_last_business_day = |d: chrono::NaiveDate| -> bool {
372 let next = d.succ_opt();
376 match next {
377 Some(n) => n.month() != d.month(),
378 None => true,
379 }
380 };
381
382 match input_field {
383 "month" => posting_date.month() as f64,
384 "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
385 "year" => posting_date.year() as f64,
386 "day_of_week" => posting_date.weekday().number_from_monday() as f64,
387 "day_of_month" => posting_date.day() as f64,
388 "day_of_year" => posting_date.ordinal() as f64,
389 "week_of_year" => posting_date.iso_week().week() as f64,
390 "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
391 "is_quarter_end" => {
392 let m = posting_date.month();
393 let is_q_month = matches!(m, 3 | 6 | 9 | 12);
394 f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
395 }
396 "is_year_end" => f64::from(u8::from(
397 posting_date.month() == 12 && is_last_business_day(posting_date),
398 )),
399 _ => 0.0,
400 }
401 }
402}
403
404fn industry_profile_to_log_normal(
405 p: datasynth_config::schema::IndustryProfileType,
406) -> datasynth_core::distributions::LogNormalMixtureConfig {
407 use datasynth_config::schema::IndustryProfileType as P;
408 let industry = match p {
409 P::Retail => IndustryType::Retail,
410 P::Manufacturing => IndustryType::Manufacturing,
411 P::FinancialServices => IndustryType::FinancialServices,
412 P::Healthcare => IndustryType::Healthcare,
413 P::Technology => IndustryType::Technology,
414 };
415 IndustryAmountProfile::for_industry(industry).sales_amounts
416}
417
418#[derive(Clone)]
423struct BatchState {
424 base_account_number: String,
426 base_amount: rust_decimal::Decimal,
427 base_business_process: Option<BusinessProcess>,
428 base_posting_date: NaiveDate,
429 remaining: u8,
431}
432
433impl JournalEntryGenerator {
434 pub fn new_with_params(
436 config: TransactionConfig,
437 coa: Arc<ChartOfAccounts>,
438 companies: Vec<String>,
439 start_date: NaiveDate,
440 end_date: NaiveDate,
441 seed: u64,
442 ) -> Self {
443 Self::new_with_full_config(
444 config,
445 coa,
446 companies,
447 start_date,
448 end_date,
449 seed,
450 TemplateConfig::default(),
451 None,
452 )
453 }
454
455 #[allow(clippy::too_many_arguments)]
457 pub fn new_with_full_config(
458 config: TransactionConfig,
459 coa: Arc<ChartOfAccounts>,
460 companies: Vec<String>,
461 start_date: NaiveDate,
462 end_date: NaiveDate,
463 seed: u64,
464 template_config: TemplateConfig,
465 user_pool: Option<UserPool>,
466 ) -> Self {
467 let user_pool = user_pool.or_else(|| {
469 if template_config.names.generate_realistic_names {
470 let user_gen_config = UserGeneratorConfig {
471 culture_distribution: vec![
472 (
473 datasynth_core::templates::NameCulture::WesternUs,
474 template_config.names.culture_distribution.western_us,
475 ),
476 (
477 datasynth_core::templates::NameCulture::Hispanic,
478 template_config.names.culture_distribution.hispanic,
479 ),
480 (
481 datasynth_core::templates::NameCulture::German,
482 template_config.names.culture_distribution.german,
483 ),
484 (
485 datasynth_core::templates::NameCulture::French,
486 template_config.names.culture_distribution.french,
487 ),
488 (
489 datasynth_core::templates::NameCulture::Chinese,
490 template_config.names.culture_distribution.chinese,
491 ),
492 (
493 datasynth_core::templates::NameCulture::Japanese,
494 template_config.names.culture_distribution.japanese,
495 ),
496 (
497 datasynth_core::templates::NameCulture::Indian,
498 template_config.names.culture_distribution.indian,
499 ),
500 ],
501 email_domain: template_config.names.email_domain.clone(),
502 generate_realistic_names: true,
503 };
504 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
505 Some(user_gen.generate_standard(&companies))
506 } else {
507 None
508 }
509 });
510
511 let mut ref_gen = ReferenceGenerator::new(
513 start_date.year(),
514 companies
515 .first()
516 .map(std::string::String::as_str)
517 .unwrap_or("1000"),
518 );
519 ref_gen.set_prefix(
520 ReferenceType::Invoice,
521 &template_config.references.invoice_prefix,
522 );
523 ref_gen.set_prefix(
524 ReferenceType::PurchaseOrder,
525 &template_config.references.po_prefix,
526 );
527 ref_gen.set_prefix(
528 ReferenceType::SalesOrder,
529 &template_config.references.so_prefix,
530 );
531
532 let company_selector = WeightedCompanySelector::uniform(companies.clone());
534
535 Self {
536 rng: seeded_rng(seed, 0),
537 source_mix_rng: seeded_rng(seed, 50_063),
538 recurring_archetypes: std::collections::HashMap::new(),
539 template_rng: seeded_rng(seed, 70_081),
540 reversal_buffer: Vec::new(),
541 reversal_rng: seeded_rng(seed, 90_017),
542 account_rng: seeded_rng(seed, 60_071),
543 allocation_rng: seeded_rng(seed, 80_023),
544 fx_rng: seeded_rng(seed, 70_093),
545 cond_pair_rng: seeded_rng(seed, 110_071),
546 cond_pair_sampler: None,
547 current_je_source: None,
548 seed,
549 config: config.clone(),
550 coa,
551 companies,
552 company_currencies: std::collections::HashMap::new(),
553 company_selector,
554 line_sampler: LineItemSampler::with_config(
555 seed + 1,
556 config.line_item_distribution.clone(),
557 config.even_odd_distribution.clone(),
558 config.debit_credit_distribution.clone(),
559 ),
560 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
561 temporal_sampler: TemporalSampler::with_config(
562 seed + 3,
563 config.seasonality.clone(),
564 WorkingHoursConfig::default(),
565 Vec::new(),
566 ),
567 start_date,
568 end_date,
569 count: 0,
570 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
571 user_pool,
572 description_generator: DescriptionGenerator::new(),
573 reference_generator: ref_gen,
574 template_config,
575 vendor_pool: VendorPool::standard(),
576 customer_pool: CustomerPool::standard(),
577 material_pool: None,
578 cost_center_pool: Vec::new(),
579 profit_center_pool: Vec::new(),
580 using_real_master_data: false,
581 fraud_config: FraudConfig::default(),
582 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
587 batching_enabled: true,
588 drift_controller: None,
589 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
592 Region::US,
593 start_date.year(),
594 ))),
595 processing_lag_calculator: None,
596 temporal_patterns_config: None,
597 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
598 advanced_amount_sampler: None,
599 conditional_amount_override: None,
600 correlation_copula: None,
601 loaded_priors: None,
602 iet_day_accum: std::collections::HashMap::new(),
603 iet_burst_remaining: std::collections::HashMap::new(),
604 last_tp_by_source: std::collections::HashMap::new(),
605 velocity_calibrator: None,
606 md_resolver: MasterDataResolver::default(),
607 }
608 }
609
610 pub fn set_advanced_distributions(
629 &mut self,
630 config: &AdvancedDistributionConfig,
631 seed: u64,
632 ) -> Result<(), String> {
633 if !config.enabled {
634 return Ok(());
635 }
636
637 self.conditional_amount_override = config
643 .conditional
644 .iter()
645 .find(|c| {
646 c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
647 })
648 .and_then(|c| {
649 datasynth_core::distributions::ConditionalSampler::new(
650 seed.wrapping_add(17),
651 c.to_core_config(),
652 )
653 .ok()
654 });
655
656 self.correlation_copula = config
662 .correlations
663 .to_core_config_for_pair("amount", "line_count")
664 .and_then(|copula_cfg| {
665 datasynth_core::distributions::BivariateCopulaSampler::new(
666 seed.wrapping_add(31),
667 copula_cfg,
668 )
669 .ok()
670 });
671
672 if let Some(pareto) = &config.pareto {
677 if pareto.enabled {
678 let core_cfg = pareto.to_core_config();
679 self.advanced_amount_sampler =
680 Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
681 return Ok(());
682 }
683 }
684
685 if !config.amounts.enabled {
686 return Ok(());
687 }
688
689 match config.amounts.distribution_type {
690 MixtureDistributionType::LogNormal => {
691 let lognormal_cfg = config.amounts.to_log_normal_config().or_else(|| {
692 config
693 .industry_profile
694 .as_ref()
695 .map(|p| industry_profile_to_log_normal(p.profile_type()))
696 });
697 if let Some(cfg) = lognormal_cfg {
698 self.advanced_amount_sampler =
699 Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
700 }
701 }
702 MixtureDistributionType::Gaussian => {
703 if let Some(cfg) = config.amounts.to_gaussian_config() {
704 self.advanced_amount_sampler =
705 Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
706 }
707 }
708 }
709
710 Ok(())
711 }
712
713 pub fn set_business_process_weights(
717 &mut self,
718 o2c: f64,
719 p2p: f64,
720 r2r: f64,
721 h2r: f64,
722 a2r: f64,
723 ) {
724 self.business_process_weights = [
725 (BusinessProcess::O2C, o2c),
726 (BusinessProcess::P2P, p2p),
727 (BusinessProcess::R2R, r2r),
728 (BusinessProcess::H2R, h2r),
729 (BusinessProcess::A2R, a2r),
730 ];
731 }
732
733 pub fn from_generator_config(
738 full_config: &GeneratorConfig,
739 coa: Arc<ChartOfAccounts>,
740 start_date: NaiveDate,
741 end_date: NaiveDate,
742 seed: u64,
743 ) -> Self {
744 let companies: Vec<String> = full_config
745 .companies
746 .iter()
747 .map(|c| c.code.clone())
748 .collect();
749
750 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
752
753 let mut generator = Self::new_with_full_config(
754 full_config.transactions.clone(),
755 coa,
756 companies,
757 start_date,
758 end_date,
759 seed,
760 full_config.templates.clone(),
761 None,
762 );
763
764 generator.company_selector = company_selector;
766
767 generator.fraud_config = full_config.fraud.clone();
769
770 let temporal_config = &full_config.temporal_patterns;
772 if temporal_config.enabled {
773 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
774 }
775
776 generator
777 }
778
779 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
786 if config.business_days.enabled {
788 let region = config
789 .calendars
790 .regions
791 .first()
792 .map(|r| Self::parse_region(r))
793 .unwrap_or(Region::US);
794
795 let calendar = HolidayCalendar::new(region, self.start_date.year());
796 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
797 }
798
799 if config.processing_lags.enabled {
801 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
802 self.processing_lag_calculator =
803 Some(ProcessingLagCalculator::with_config(seed, lag_config));
804 }
805
806 let model = config.period_end.model.as_deref().unwrap_or("flat");
808 if model != "flat"
809 || config
810 .period_end
811 .month_end
812 .as_ref()
813 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
814 {
815 let dynamics = Self::convert_period_end_config(&config.period_end);
816 self.temporal_sampler.set_period_end_dynamics(dynamics);
817 }
818
819 self.temporal_patterns_config = Some(config);
820 self
821 }
822
823 pub fn with_country_pack_temporal(
831 mut self,
832 config: TemporalPatternsConfig,
833 seed: u64,
834 pack: &CountryPack,
835 ) -> Self {
836 if config.business_days.enabled {
838 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
839 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
840 }
841
842 if config.processing_lags.enabled {
844 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
845 self.processing_lag_calculator =
846 Some(ProcessingLagCalculator::with_config(seed, lag_config));
847 }
848
849 let model = config.period_end.model.as_deref().unwrap_or("flat");
851 if model != "flat"
852 || config
853 .period_end
854 .month_end
855 .as_ref()
856 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
857 {
858 let dynamics = Self::convert_period_end_config(&config.period_end);
859 self.temporal_sampler.set_period_end_dynamics(dynamics);
860 }
861
862 self.temporal_patterns_config = Some(config);
863 self
864 }
865
866 fn convert_processing_lag_config(
868 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
869 ) -> ProcessingLagConfig {
870 let mut config = ProcessingLagConfig {
871 enabled: schema.enabled,
872 ..Default::default()
873 };
874
875 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
877 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
878 if let Some(min) = lag.min_hours {
879 dist.min_lag_hours = min;
880 }
881 if let Some(max) = lag.max_hours {
882 dist.max_lag_hours = max;
883 }
884 dist
885 };
886
887 if let Some(ref lag) = schema.sales_order_lag {
889 config
890 .event_lags
891 .insert(EventType::SalesOrder, convert_lag(lag));
892 }
893 if let Some(ref lag) = schema.purchase_order_lag {
894 config
895 .event_lags
896 .insert(EventType::PurchaseOrder, convert_lag(lag));
897 }
898 if let Some(ref lag) = schema.goods_receipt_lag {
899 config
900 .event_lags
901 .insert(EventType::GoodsReceipt, convert_lag(lag));
902 }
903 if let Some(ref lag) = schema.invoice_receipt_lag {
904 config
905 .event_lags
906 .insert(EventType::InvoiceReceipt, convert_lag(lag));
907 }
908 if let Some(ref lag) = schema.invoice_issue_lag {
909 config
910 .event_lags
911 .insert(EventType::InvoiceIssue, convert_lag(lag));
912 }
913 if let Some(ref lag) = schema.payment_lag {
914 config
915 .event_lags
916 .insert(EventType::Payment, convert_lag(lag));
917 }
918 if let Some(ref lag) = schema.journal_entry_lag {
919 config
920 .event_lags
921 .insert(EventType::JournalEntry, convert_lag(lag));
922 }
923
924 if let Some(ref cross_day) = schema.cross_day_posting {
926 config.cross_day = CrossDayConfig {
927 enabled: cross_day.enabled,
928 probability_by_hour: cross_day.probability_by_hour.clone(),
929 ..Default::default()
930 };
931 }
932
933 config
934 }
935
936 fn convert_period_end_config(
938 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
939 ) -> PeriodEndDynamics {
940 let model_type = schema.model.as_deref().unwrap_or("exponential");
941
942 let convert_period =
944 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
945 default_peak: f64|
946 -> PeriodEndConfig {
947 if let Some(p) = period {
948 let model = match model_type {
949 "flat" => PeriodEndModel::FlatMultiplier {
950 multiplier: p.peak_multiplier.unwrap_or(default_peak),
951 },
952 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
953 start_day: p.start_day.unwrap_or(-10),
954 sustained_high_days: p.sustained_high_days.unwrap_or(3),
955 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
956 ramp_up_days: 3, },
958 _ => PeriodEndModel::ExponentialAcceleration {
959 start_day: p.start_day.unwrap_or(-10),
960 base_multiplier: p.base_multiplier.unwrap_or(1.0),
961 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
962 decay_rate: p.decay_rate.unwrap_or(0.3),
963 },
964 };
965 PeriodEndConfig {
966 enabled: true,
967 model,
968 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
969 }
970 } else {
971 PeriodEndConfig {
972 enabled: true,
973 model: PeriodEndModel::ExponentialAcceleration {
974 start_day: -10,
975 base_multiplier: 1.0,
976 peak_multiplier: default_peak,
977 decay_rate: 0.3,
978 },
979 additional_multiplier: 1.0,
980 }
981 }
982 };
983
984 PeriodEndDynamics::new(
985 convert_period(schema.month_end.as_ref(), 2.0),
986 convert_period(schema.quarter_end.as_ref(), 3.5),
987 convert_period(schema.year_end.as_ref(), 5.0),
988 )
989 }
990
991 fn parse_region(region_str: &str) -> Region {
993 match region_str.to_uppercase().as_str() {
994 "US" => Region::US,
995 "DE" => Region::DE,
996 "GB" => Region::GB,
997 "CN" => Region::CN,
998 "JP" => Region::JP,
999 "IN" => Region::IN,
1000 "BR" => Region::BR,
1001 "MX" => Region::MX,
1002 "AU" => Region::AU,
1003 "SG" => Region::SG,
1004 "KR" => Region::KR,
1005 "FR" => Region::FR,
1006 "IT" => Region::IT,
1007 "ES" => Region::ES,
1008 "CA" => Region::CA,
1009 _ => Region::US,
1010 }
1011 }
1012
1013 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
1015 self.company_selector = selector;
1016 }
1017
1018 pub fn company_selector(&self) -> &WeightedCompanySelector {
1020 &self.company_selector
1021 }
1022
1023 pub fn set_fraud_config(&mut self, config: FraudConfig) {
1025 self.fraud_config = config;
1026 }
1027
1028 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
1033 if !vendors.is_empty() {
1034 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
1035 self.using_real_master_data = true;
1036 }
1037 self
1038 }
1039
1040 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
1045 if !customers.is_empty() {
1046 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
1047 self.using_real_master_data = true;
1048 }
1049 self
1050 }
1051
1052 pub fn with_materials(mut self, materials: &[Material]) -> Self {
1056 if !materials.is_empty() {
1057 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
1058 self.using_real_master_data = true;
1059 }
1060 self
1061 }
1062
1063 pub fn with_master_data(
1068 self,
1069 vendors: &[Vendor],
1070 customers: &[Customer],
1071 materials: &[Material],
1072 ) -> Self {
1073 self.with_vendors(vendors)
1074 .with_customers(customers)
1075 .with_materials(materials)
1076 }
1077
1078 fn refresh_md_resolver(&mut self) {
1084 let companies: Vec<String> = self
1085 .vendor_pool
1086 .vendors
1087 .iter()
1088 .map(|v| v.name.clone())
1089 .chain(self.customer_pool.customers.iter().map(|c| c.name.clone()))
1090 .collect();
1091
1092 let persons: Vec<String> = self
1093 .user_pool
1094 .as_ref()
1095 .map(|p| p.users.iter().map(|u| u.display_name.clone()).collect())
1096 .unwrap_or_default();
1097
1098 let streets: Vec<String> = Vec::new(); let patients = synthetic_patient_pool("de_CH");
1100
1101 self.md_resolver = MasterDataResolver {
1102 companies,
1103 persons,
1104 streets,
1105 patients,
1106 };
1107 }
1108
1109 pub fn with_cost_center_pool(mut self, ids: Vec<String>) -> Self {
1118 self.cost_center_pool = ids;
1119 self
1120 }
1121
1122 pub fn with_profit_center_pool(mut self, ids: Vec<String>) -> Self {
1130 self.profit_center_pool = ids;
1131 self
1132 }
1133
1134 pub fn with_user_pool(mut self, pool: UserPool) -> Self {
1143 self.user_pool = Some(pool);
1144 self
1145 }
1146
1147 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
1154 let name_gen =
1155 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
1156 let config = UserGeneratorConfig {
1157 culture_distribution: Vec::new(),
1160 email_domain: name_gen.email_domain().to_string(),
1161 generate_realistic_names: true,
1162 };
1163 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
1164 self.user_pool = Some(user_gen.generate_standard(&self.companies));
1165 self
1166 }
1167
1168 pub fn is_using_real_master_data(&self) -> bool {
1170 self.using_real_master_data
1171 }
1172
1173 fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
1187 if is_manual {
1188 const MANUAL: &[&str] = &[
1191 "manual/standard",
1192 "manual/adjustment",
1193 "manual/reclassification",
1194 "manual/accrual",
1195 "manual/reversal",
1196 "manual/correction",
1197 "spreadsheet/upload",
1198 "spreadsheet/journal",
1199 ];
1200 let idx = (rng.random::<u32>() as usize) % MANUAL.len();
1201 return MANUAL[idx].to_string();
1202 }
1203
1204 let primary: &[&str] = match bp {
1208 BusinessProcess::P2P => &[
1209 "SAP-MM/PO",
1210 "SAP-MM/IV",
1211 "SAP-MM/IM",
1212 "SAP-FI/AP",
1213 "Interface/EDI",
1214 ],
1215 BusinessProcess::O2C => &[
1216 "SAP-SD/ORD",
1217 "SAP-SD/DEL",
1218 "SAP-SD/IV",
1219 "SAP-FI/AR",
1220 "Interface/Lockbox",
1221 ],
1222 BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
1223 BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
1224 BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
1225 BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
1226 BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
1227 _ => &[
1230 "SAP-FI/GL",
1231 "SAP-FI/AP",
1232 "SAP-FI/AR",
1233 "SAP-FI/AA",
1234 "External/SubL",
1235 ],
1236 };
1237
1238 const CROSS: &[&str] = &[
1241 "SAP-FI/GL",
1242 "SAP-FI/AP",
1243 "SAP-FI/AR",
1244 "Interface/EDI",
1245 "Interface/Bank",
1246 "External/SubL",
1247 ];
1248 let pool = if rng.random::<f64>() < 0.80 {
1249 primary
1250 } else {
1251 CROSS
1252 };
1253 let idx = (rng.random::<u32>() as usize) % pool.len();
1254 pool[idx].to_string()
1255 }
1256
1257 fn sample_sap_source_code(&mut self) -> Option<String> {
1264 if let Some(p) = self.loaded_priors.as_ref() {
1265 return Some(p.source_mix.sample(&mut self.rng));
1266 }
1267 if self.config.synthetic_source_codes.unwrap_or(true) {
1268 return Some(DEFAULT_SOURCE_MIX.sample(&mut self.source_mix_rng));
1271 }
1272 None
1273 }
1274
1275 fn pick_recurring_archetype(
1283 &mut self,
1284 company: &str,
1285 doc_type: &str,
1286 debit_count: usize,
1287 credit_count: usize,
1288 ) -> Option<(Vec<String>, Vec<String>)> {
1289 if !self.config.recurring_templates.unwrap_or(true) {
1290 return None;
1291 }
1292 let p_reuse_opt = self.config.archetype_reuse_probability;
1297 if p_reuse_opt.is_none() && self.loaded_priors.is_some() {
1298 return None;
1299 }
1300 let p_reuse = p_reuse_opt.unwrap_or(0.90);
1301 if self.template_rng.random::<f64>() >= p_reuse {
1302 return None;
1303 }
1304 let lib = self
1305 .recurring_archetypes
1306 .get(&(company.to_string(), doc_type.to_string()))?;
1307 let matching: Vec<&(Vec<String>, Vec<String>)> = lib
1308 .iter()
1309 .filter(|(d, c)| d.len() == debit_count && c.len() == credit_count)
1310 .collect();
1311 if matching.is_empty() {
1312 return None;
1313 }
1314 let idx = Self::power_law_index(matching.len(), &mut self.template_rng).unwrap_or(0);
1322 Some(matching[idx].clone())
1323 }
1324
1325 fn cache_recurring_archetype(
1328 &mut self,
1329 company: &str,
1330 doc_type: &str,
1331 debit: Vec<String>,
1332 credit: Vec<String>,
1333 ) {
1334 if self.loaded_priors.is_some() || !self.config.recurring_templates.unwrap_or(true) {
1335 return;
1336 }
1337 if debit.is_empty() && credit.is_empty() {
1338 return;
1339 }
1340 const CAP: usize = 24; let lib = self
1342 .recurring_archetypes
1343 .entry((company.to_string(), doc_type.to_string()))
1344 .or_default();
1345 if lib.len() < CAP {
1346 lib.push((debit, credit));
1347 }
1348 }
1349
1350 fn maybe_generate_reversal(&mut self) -> Option<JournalEntry> {
1357 let rate = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE);
1358 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1359 return None;
1360 }
1361 if self.reversal_rng.random::<f64>() >= rate {
1362 return None;
1363 }
1364 let pick = (self.reversal_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1365 let mut entry = self.reversal_buffer.remove(pick);
1369 let orig_id = entry.header.document_id;
1370 let offset = 1 + (self.reversal_rng.random::<u32>() % 7) as i64;
1372 let mut rev_date = entry.header.posting_date + chrono::Duration::days(offset);
1373 if let Some(ref calc) = self.business_day_calculator {
1374 if !calc.is_business_day(rev_date) {
1375 rev_date = calc.next_business_day(rev_date, false);
1376 }
1377 }
1378 if rev_date > self.end_date {
1379 rev_date = entry.header.posting_date;
1380 }
1381 let rev_id =
1383 uuid::Uuid::from_u128(orig_id.as_u128() ^ 0x5245_5645_5253_414c_5245_5645_5253_414c);
1384 entry.header.document_id = rev_id;
1387 entry.header.posting_date = rev_date;
1388 entry.header.document_date = rev_date;
1389 entry.header.fiscal_year = rev_date.year() as u16;
1390 entry.header.fiscal_period = rev_date.month() as u8;
1391 entry.header.header_text = Some(format!("Reversal of {orig_id}"));
1392 entry.header.reference = Some(format!("REV-{orig_id}"));
1393 entry.header.batch_id = None;
1394 for line in entry.lines.iter_mut() {
1395 std::mem::swap(&mut line.debit_amount, &mut line.credit_amount);
1396 line.document_id = rev_id;
1397 }
1398 Some(entry)
1399 }
1400
1401 fn record_for_reversal(&mut self, entry: &JournalEntry) {
1405 let reversal_on = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE) > 0.0;
1406 let allocation_on = self
1407 .config
1408 .allocation_batch_rate
1409 .unwrap_or(DEFAULT_ALLOCATION_RATE)
1410 > 0.0;
1411 if (!reversal_on && !allocation_on) || entry.lines.is_empty() {
1412 return;
1413 }
1414 const CAP: usize = 64;
1415 if self.reversal_buffer.len() >= CAP {
1416 self.reversal_buffer.remove(0);
1417 }
1418 self.reversal_buffer.push(entry.clone());
1419 }
1420
1421 fn maybe_apply_foreign_currency(&mut self, entry: &mut JournalEntry) {
1429 let prob = self.config.foreign_currency_rate.unwrap_or(0.0);
1430 if prob <= 0.0 || self.fx_rng.random::<f64>() >= prob {
1431 return;
1432 }
1433 let (code, rate) = FOREIGN_CCYS[self.fx_rng.random_range(0..FOREIGN_CCYS.len())];
1434 let rate_dec = match Decimal::from_f64_retain(rate) {
1435 Some(r) if r > Decimal::ZERO => r,
1436 _ => return,
1437 };
1438 entry.header.currency = code.to_string();
1439 entry.header.exchange_rate = rate_dec;
1440 for line in entry.lines.iter_mut() {
1441 let ledger = line.debit_amount + line.credit_amount; line.transaction_amount = Some((ledger / rate_dec).round_dp(2));
1443 }
1444 }
1445
1446 fn split_amount(total: Decimal, n: usize, rng: &mut ChaCha8Rng) -> Vec<Decimal> {
1451 let n = n.max(1);
1452 let total_cents = (total.round_dp(2) * Decimal::from(100))
1453 .to_i64()
1454 .unwrap_or(0);
1455 if n == 1 || total_cents < n as i64 {
1456 return vec![total];
1457 }
1458 let weights: Vec<f64> = (0..n).map(|_| 0.5 + rng.random::<f64>()).collect();
1459 let sumw: f64 = weights.iter().sum::<f64>().max(f64::EPSILON);
1460 let spare = total_cents - n as i64; let mut cents: Vec<i64> = weights
1462 .iter()
1463 .map(|w| 1 + (spare as f64 * w / sumw).floor() as i64)
1464 .collect();
1465 let assigned: i64 = cents.iter().sum();
1467 let leftover = total_cents - assigned;
1468 if let Some(maxp) = cents.iter_mut().max_by_key(|c| **c) {
1469 *maxp += leftover;
1470 }
1471 cents.into_iter().map(|c| Decimal::new(c, 2)).collect()
1472 }
1473
1474 fn business_unit_for_dimension(dim: &str) -> String {
1480 const N_BU: u32 = 11;
1481 let mut h: u32 = 0x811c_9dc5;
1482 for b in dim.bytes() {
1483 h ^= b as u32;
1484 h = h.wrapping_mul(0x0100_0193);
1485 }
1486 format!("BU{:02}", (h % N_BU) + 1)
1487 }
1488
1489 fn maybe_generate_allocation_batch(&mut self) -> Option<JournalEntry> {
1497 let rate = self
1498 .config
1499 .allocation_batch_rate
1500 .unwrap_or(DEFAULT_ALLOCATION_RATE);
1501 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1502 return None;
1503 }
1504 if self.allocation_rng.random::<f64>() >= rate {
1505 return None;
1506 }
1507 let pick = (self.allocation_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1508 let mut entry = self.reversal_buffer.remove(pick);
1511 let idx = entry
1513 .lines
1514 .iter()
1515 .enumerate()
1516 .filter(|(_, l)| l.debit_amount > Decimal::ZERO)
1517 .max_by(|a, b| a.1.debit_amount.cmp(&b.1.debit_amount))
1518 .map(|(i, _)| i)?;
1519 let template = entry.lines[idx].clone();
1520 let n = self
1521 .allocation_rng
1522 .random_range(ALLOCATION_MIN_TARGETS..=ALLOCATION_MAX_TARGETS) as usize;
1523 let parts = Self::split_amount(template.debit_amount, n, &mut self.allocation_rng);
1524 if parts.len() < ALLOCATION_MIN_TARGETS as usize {
1525 return None;
1527 }
1528 let company_code = entry.header.company_code.clone();
1530 let cc_pool: Vec<String> = if self.cost_center_pool.is_empty() {
1531 Self::COST_CENTER_POOL
1532 .iter()
1533 .map(|s| s.to_string())
1534 .collect()
1535 } else {
1536 let needle = format!("-{company_code}-");
1537 let filtered: Vec<String> = self
1538 .cost_center_pool
1539 .iter()
1540 .filter(|id| id.contains(&needle))
1541 .cloned()
1542 .collect();
1543 if filtered.is_empty() {
1544 self.cost_center_pool.clone()
1545 } else {
1546 filtered
1547 }
1548 };
1549 let mut new_lines: Vec<JournalEntryLine> =
1550 Vec::with_capacity(entry.lines.len() + parts.len() - 1);
1551 for (j, line) in entry.lines.iter().enumerate() {
1552 if j == idx {
1553 let bu_on = self.config.business_unit_dimension.unwrap_or(true);
1554 for (k, part) in parts.iter().enumerate() {
1555 let mut nl = template.clone();
1556 nl.debit_amount = *part;
1557 nl.credit_amount = Decimal::ZERO;
1558 nl.cost_center = Some(cc_pool[k % cc_pool.len()].clone());
1559 if bu_on {
1562 nl.business_unit = nl
1563 .cost_center
1564 .as_deref()
1565 .map(Self::business_unit_for_dimension);
1566 }
1567 new_lines.push(nl);
1568 }
1569 } else {
1570 new_lines.push(line.clone());
1571 }
1572 }
1573 let base_id = entry.header.document_id;
1575 let alloc_id =
1576 uuid::Uuid::from_u128(base_id.as_u128() ^ 0xA110_CA70_A110_CA70_A110_CA70_A110_CA70);
1577 entry.header.document_id = alloc_id;
1578 entry.header.sap_source_code = Some("AB".to_string());
1579 entry.header.header_text = Some("Allocation/assessment cycle".to_string());
1580 entry.header.reference = Some(format!("ALLOC-{base_id}"));
1581 entry.header.batch_id = None;
1582 for (i, line) in new_lines.iter_mut().enumerate() {
1583 line.line_number = (i + 1) as u32;
1584 line.document_id = alloc_id;
1585 }
1586 entry.lines = new_lines.into();
1587 Some(entry)
1588 }
1589
1590 fn determine_fraud(&mut self, business_process: BusinessProcess) -> Option<FraudType> {
1591 if !self.fraud_config.enabled {
1592 return None;
1593 }
1594
1595 let process_slug = match business_process {
1604 BusinessProcess::P2P => "P2P",
1605 BusinessProcess::O2C => "O2C",
1606 BusinessProcess::R2R => "R2R",
1607 BusinessProcess::H2R => "H2R",
1608 BusinessProcess::A2R => "A2R",
1609 BusinessProcess::S2C => "S2C",
1610 BusinessProcess::Mfg => "MFG",
1611 BusinessProcess::Bank => "BANK",
1612 BusinessProcess::Audit => "AUDIT",
1613 BusinessProcess::Treasury => "TREASURY",
1614 BusinessProcess::Tax => "TAX",
1615 BusinessProcess::Intercompany => "INTERCOMPANY",
1616 BusinessProcess::ProjectAccounting => "PROJECT",
1617 BusinessProcess::Esg => "ESG",
1618 };
1619 let effective_rate = self
1620 .fraud_config
1621 .per_process_rates
1622 .get(process_slug)
1623 .copied()
1624 .unwrap_or(self.fraud_config.fraud_rate);
1625
1626 if self.rng.random::<f64>() >= effective_rate {
1628 return None;
1629 }
1630
1631 Some(self.select_fraud_type())
1633 }
1634
1635 fn select_fraud_type(&mut self) -> FraudType {
1637 let dist = &self.fraud_config.fraud_type_distribution;
1638 let roll: f64 = self.rng.random();
1639
1640 let mut cumulative = 0.0;
1641
1642 cumulative += dist.suspense_account_abuse;
1643 if roll < cumulative {
1644 return FraudType::SuspenseAccountAbuse;
1645 }
1646
1647 cumulative += dist.fictitious_transaction;
1648 if roll < cumulative {
1649 return FraudType::FictitiousTransaction;
1650 }
1651
1652 cumulative += dist.revenue_manipulation;
1653 if roll < cumulative {
1654 return FraudType::RevenueManipulation;
1655 }
1656
1657 cumulative += dist.expense_capitalization;
1658 if roll < cumulative {
1659 return FraudType::ExpenseCapitalization;
1660 }
1661
1662 cumulative += dist.split_transaction;
1663 if roll < cumulative {
1664 return FraudType::SplitTransaction;
1665 }
1666
1667 cumulative += dist.timing_anomaly;
1668 if roll < cumulative {
1669 return FraudType::TimingAnomaly;
1670 }
1671
1672 cumulative += dist.unauthorized_access;
1673 if roll < cumulative {
1674 return FraudType::UnauthorizedAccess;
1675 }
1676
1677 cumulative += dist.duplicate_payment;
1678 if roll < cumulative {
1679 return FraudType::DuplicatePayment;
1680 }
1681
1682 cumulative += dist.kickback_scheme;
1683 if roll < cumulative {
1684 return FraudType::KickbackScheme;
1685 }
1686
1687 cumulative += dist.round_tripping;
1688 if roll < cumulative {
1689 return FraudType::RoundTripping;
1690 }
1691
1692 cumulative += dist.unauthorized_discount;
1693 if roll < cumulative {
1694 return FraudType::UnauthorizedDiscount;
1695 }
1696
1697 FraudType::DuplicatePayment
1699 }
1700
1701 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1703 match fraud_type {
1704 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1705 FraudAmountPattern::ThresholdAdjacent
1706 }
1707 FraudType::FictitiousTransaction
1708 | FraudType::FictitiousEntry
1709 | FraudType::SuspenseAccountAbuse
1710 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1711 FraudType::RevenueManipulation
1712 | FraudType::ExpenseCapitalization
1713 | FraudType::ImproperCapitalization
1714 | FraudType::ReserveManipulation
1715 | FraudType::UnauthorizedAccess
1716 | FraudType::PrematureRevenue
1717 | FraudType::UnderstatedLiabilities
1718 | FraudType::OverstatedAssets
1719 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1720 FraudType::DuplicatePayment
1721 | FraudType::TimingAnomaly
1722 | FraudType::SelfApproval
1723 | FraudType::ExceededApprovalLimit
1724 | FraudType::SegregationOfDutiesViolation
1725 | FraudType::UnauthorizedApproval
1726 | FraudType::CollusiveApproval
1727 | FraudType::FictitiousVendor
1728 | FraudType::ShellCompanyPayment
1729 | FraudType::Kickback
1730 | FraudType::KickbackScheme
1731 | FraudType::UnauthorizedDiscount
1732 | FraudType::RoundTripping
1733 | FraudType::InvoiceManipulation
1734 | FraudType::AssetMisappropriation
1735 | FraudType::InventoryTheft
1736 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1737 FraudType::ImproperRevenueRecognition
1739 | FraudType::ImproperPoAllocation
1740 | FraudType::VariableConsiderationManipulation
1741 | FraudType::ContractModificationMisstatement => {
1742 FraudAmountPattern::StatisticallyImprobable
1743 }
1744 FraudType::LeaseClassificationManipulation
1746 | FraudType::OffBalanceSheetLease
1747 | FraudType::LeaseLiabilityUnderstatement
1748 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1749 FraudType::FairValueHierarchyManipulation
1751 | FraudType::Level3InputManipulation
1752 | FraudType::ValuationTechniqueManipulation => {
1753 FraudAmountPattern::StatisticallyImprobable
1754 }
1755 FraudType::DelayedImpairment
1757 | FraudType::ImpairmentTestAvoidance
1758 | FraudType::CashFlowProjectionManipulation
1759 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1760 FraudType::BidRigging
1762 | FraudType::PhantomVendorContract
1763 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1764 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1765 FraudType::GhostEmployeePayroll
1767 | FraudType::PayrollInflation
1768 | FraudType::DuplicateExpenseReport
1769 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1770 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1771 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1773 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1774 }
1775 }
1776
1777 #[inline]
1779 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1780 self.uuid_factory.next()
1781 }
1782
1783 const COST_CENTER_POOL: &'static [&'static str] =
1785 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1786
1787 fn enrich_line_items(&mut self, entry: &mut JournalEntry) {
1796 let posting_date = entry.header.posting_date;
1797 let company_code = &entry.header.company_code;
1798 let header_text = entry.header.header_text.clone();
1799 let business_process = entry.header.business_process;
1800 let doc_type_key = entry.header.document_type.clone();
1803
1804 let header_sap_code: Option<String> = entry.header.sap_source_code.clone();
1808
1809 let (cc_pc_neighbor_vec, cc_pc_share_prob): (Vec<String>, f64) =
1813 if let Some(priors) = &self.loaded_priors {
1814 if let Some(motifs) = &priors.cross_entity_motifs {
1815 (
1816 motifs.neighbors(&doc_type_key).to_vec(),
1817 motifs.should_share(&doc_type_key),
1818 )
1819 } else {
1820 (Vec::new(), 0.0)
1821 }
1822 } else {
1823 (Vec::new(), 0.0)
1824 };
1825
1826 let doc_id_bytes = entry.header.document_id.as_bytes();
1828 let mut cc_seed: usize = 0;
1829 for &b in doc_id_bytes {
1830 cc_seed = cc_seed.wrapping_add(b as usize);
1831 }
1832
1833 for (i, line) in entry.lines.iter_mut().enumerate() {
1834 if line.account_description.is_none() {
1836 line.account_description = self
1837 .coa
1838 .get_account(&line.gl_account)
1839 .map(|a| a.short_description.clone());
1840 }
1841
1842 if line.cost_center.is_none() {
1861 let priors_opt = &mut self.loaded_priors;
1866 let rng_ref = &mut self.rng;
1867 if let Some(priors) = priors_opt {
1868 let sp37_cc = header_sap_code.as_deref().and_then(|code| {
1869 priors.sample_attribute_for_source(code, "cost_center", rng_ref)
1870 });
1871 if sp37_cc.is_some() {
1872 line.cost_center = sp37_cc;
1873 } else if let Some(sampler) = priors.fanout_samplers.get_mut("CostCenter") {
1874 line.cost_center = Some(sampler.pick_for_with_neighbors(
1875 &doc_type_key,
1876 &cc_pc_neighbor_vec,
1877 cc_pc_share_prob,
1878 rng_ref,
1879 ));
1880 }
1881 }
1882 }
1883 if line.cost_center.is_none() {
1884 let first_char = line.gl_account.chars().next().unwrap_or('0');
1885 if first_char == '5' || first_char == '6' {
1886 if !self.cost_center_pool.is_empty() {
1887 let needle = format!("-{company_code}-");
1888 let candidates: Vec<&String> = self
1889 .cost_center_pool
1890 .iter()
1891 .filter(|id| id.contains(&needle))
1892 .collect();
1893 let pool: Vec<&String> = if candidates.is_empty() {
1894 self.cost_center_pool.iter().collect()
1895 } else {
1896 candidates
1897 };
1898 let idx = cc_seed.wrapping_add(i) % pool.len();
1899 line.cost_center = Some(pool[idx].clone());
1900 } else {
1901 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1902 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1903 }
1904 }
1905 }
1906
1907 if line.profit_center.is_none() {
1915 let priors_opt = &mut self.loaded_priors;
1920 let rng_ref = &mut self.rng;
1921 if let Some(priors) = priors_opt {
1922 let sp37_pc = header_sap_code.as_deref().and_then(|code| {
1923 priors.sample_attribute_for_source(code, "profit_center", rng_ref)
1924 });
1925 if sp37_pc.is_some() {
1926 line.profit_center = sp37_pc;
1927 } else if let Some(sampler) = priors.fanout_samplers.get_mut("ProfitCenter") {
1928 line.profit_center = Some(sampler.pick_for_with_neighbors(
1929 &doc_type_key,
1930 &cc_pc_neighbor_vec,
1931 cc_pc_share_prob,
1932 rng_ref,
1933 ));
1934 }
1935 }
1936 }
1937 if line.profit_center.is_none() {
1938 if !self.profit_center_pool.is_empty() {
1939 let needle = format!("-{company_code}-");
1940 let candidates: Vec<&String> = self
1941 .profit_center_pool
1942 .iter()
1943 .filter(|id| id.contains(&needle))
1944 .collect();
1945 let pool: Vec<&String> = if candidates.is_empty() {
1946 self.profit_center_pool.iter().collect()
1947 } else {
1948 candidates
1949 };
1950 let idx = cc_seed.wrapping_add(i) % pool.len();
1951 line.profit_center = Some(pool[idx].clone());
1952 } else {
1953 let suffix = match business_process {
1954 Some(BusinessProcess::P2P) => "-P2P",
1955 Some(BusinessProcess::O2C) => "-O2C",
1956 Some(BusinessProcess::R2R) => "-R2R",
1957 Some(BusinessProcess::H2R) => "-H2R",
1958 _ => "",
1959 };
1960 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1961 }
1962 }
1963
1964 if line.business_unit.is_none() && self.config.business_unit_dimension.unwrap_or(true) {
1971 if let Some(dim) = line
1972 .cost_center
1973 .as_deref()
1974 .or(line.profit_center.as_deref())
1975 {
1976 line.business_unit = Some(Self::business_unit_for_dimension(dim));
1977 }
1978 }
1979
1980 if line.trading_partner.is_none() {
1986 line.trading_partner = entry.header.trading_partner.clone();
1987 }
1988
1989 if line.line_text.is_none() {
1991 line.line_text = header_text.clone();
1992 }
1993
1994 if line.value_date.is_none()
1996 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1997 {
1998 line.value_date = Some(posting_date);
1999 }
2000
2001 if line.assignment.is_none() {
2003 if line.gl_account.starts_with("2000") {
2004 if let Some(ref ht) = header_text {
2006 if let Some(vendor_part) = ht.rsplit(" - ").next() {
2008 if vendor_part.starts_with("V-")
2009 || vendor_part.starts_with("VENDOR")
2010 || vendor_part.starts_with("Vendor")
2011 {
2012 line.assignment = Some(vendor_part.to_string());
2013 }
2014 }
2015 }
2016 } else if line.gl_account.starts_with("1100") {
2017 if let Some(ref ht) = header_text {
2019 if let Some(customer_part) = ht.rsplit(" - ").next() {
2020 if customer_part.starts_with("C-")
2021 || customer_part.starts_with("CUST")
2022 || customer_part.starts_with("Customer")
2023 {
2024 line.assignment = Some(customer_part.to_string());
2025 }
2026 }
2027 }
2028 }
2029 }
2030 }
2031 }
2032
2033 pub fn generate(&mut self) -> JournalEntry {
2035 debug!(
2036 count = self.count,
2037 companies = self.companies.len(),
2038 start_date = %self.start_date,
2039 end_date = %self.end_date,
2040 "Generating journal entry"
2041 );
2042
2043 if let Some(ref state) = self.batch_state {
2045 if state.remaining > 0 {
2046 return self.generate_batched_entry();
2047 }
2048 }
2049
2050 if let Some(rev) = self.maybe_generate_reversal() {
2053 return rev;
2054 }
2055
2056 if let Some(alloc) = self.maybe_generate_allocation_batch() {
2059 return alloc;
2060 }
2061
2062 if self.md_resolver.companies.is_empty()
2065 && self.md_resolver.persons.is_empty()
2066 && self.md_resolver.patients.is_empty()
2067 {
2068 self.refresh_md_resolver();
2069 }
2070
2071 self.count += 1;
2072
2073 let document_id = self.generate_deterministic_uuid();
2075
2076 let mut posting_date = if self.loaded_priors.is_none() {
2092 let mut d = self
2093 .temporal_sampler
2094 .sample_date(self.start_date, self.end_date);
2095 if let Some(ref calc) = self.business_day_calculator {
2097 if !calc.is_business_day(d) {
2098 use chrono::Datelike;
2105 d = if d.weekday() == chrono::Weekday::Sat {
2106 calc.prev_business_day(d, false)
2107 } else {
2108 calc.next_business_day(d, false)
2109 };
2110 if d > self.end_date {
2111 d = calc.prev_business_day(self.end_date, true);
2112 } else if d < self.start_date {
2113 d = calc.next_business_day(self.start_date, true);
2114 }
2115 }
2116 }
2117 d
2118 } else {
2119 self.start_date
2122 };
2123
2124 let company_code = self.company_selector.select(&mut self.rng).to_string();
2126
2127 let copula_uv: Option<(f64, f64)> =
2131 self.correlation_copula.as_mut().map(|cop| cop.sample());
2132
2133 let mut line_spec = self.line_sampler.sample();
2142 if let Some((_u, v)) = copula_uv {
2143 let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
2144 let old_debit = line_spec.debit_count.max(1);
2145 let old_credit = line_spec.credit_count.max(1);
2146 let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2147 .round() as usize;
2148 let new_debit = new_debit.clamp(1, new_total - 1);
2149 let new_credit = new_total - new_debit;
2150 line_spec.total_count = new_total;
2151 line_spec.debit_count = new_debit;
2152 line_spec.credit_count = new_credit;
2153 }
2154
2155 if let Some(cap) = self.config.lines_per_je_cap {
2159 let cap = cap.max(2);
2160 let total = line_spec.debit_count + line_spec.credit_count;
2161 if total > cap {
2162 let new_debit =
2163 ((line_spec.debit_count as f64 / total as f64) * cap as f64).round() as usize;
2164 let new_debit = new_debit.clamp(1, cap - 1);
2165 let new_credit = cap - new_debit;
2166 line_spec.total_count = cap;
2167 line_spec.debit_count = new_debit;
2168 line_spec.credit_count = new_credit;
2169 }
2170 }
2171
2172 let source = self.select_source();
2174 let is_automated = matches!(
2175 source,
2176 TransactionSource::Automated | TransactionSource::Recurring
2177 );
2178
2179 let sap_source_code: Option<String> = self.sample_sap_source_code();
2185 self.current_je_source = sap_source_code.clone();
2188
2189 let business_process = self.select_business_process();
2191
2192 {
2212 let priors_opt = &mut self.loaded_priors;
2214 let rng_ref = &mut self.rng;
2215 let iet_accum_ref = &mut self.iet_day_accum;
2216 let burst_ref = &mut self.iet_burst_remaining;
2217 if let Some(priors) = priors_opt {
2218 let iet_key = sap_source_code
2222 .as_deref()
2223 .unwrap_or_else(|| Self::document_type_for_process(business_process))
2224 .to_string();
2225 let period_days = (self.end_date - self.start_date).num_days().max(1) as f64;
2226
2227 const BURST_THRESHOLD_DAYS: f64 = 2.0;
2253 const BURST_PROB: f64 = 0.30;
2254 const BURST_LEN_MIN: u8 = 2;
2255 const BURST_LEN_MAX: u8 = 4;
2256
2257 let sampled_iet = priors.iet_sampler.sample_next(&iet_key, rng_ref).max(0.001);
2258
2259 let remaining = burst_ref.get(&iet_key).copied().unwrap_or(0);
2261 let iet = if remaining > 0 {
2262 burst_ref.insert(iet_key.clone(), remaining - 1);
2264 rng_ref.random_range(0.25..=1.5)
2265 } else if sampled_iet < BURST_THRESHOLD_DAYS
2266 && rng_ref.random_range(0.0..1.0) < BURST_PROB
2267 {
2268 let len = rng_ref.random_range(BURST_LEN_MIN..=BURST_LEN_MAX);
2272 burst_ref.insert(iet_key.clone(), len);
2273 sampled_iet
2274 } else {
2275 sampled_iet
2276 };
2277
2278 let accum = iet_accum_ref.entry(iet_key).or_insert(0.0);
2279 *accum += iet;
2280 if *accum >= period_days {
2282 *accum %= period_days;
2283 }
2284 let day_offset =
2285 (*accum as i64).clamp(0, (self.end_date - self.start_date).num_days());
2286 posting_date = self.start_date + chrono::Duration::days(day_offset);
2287 if let Some(ref calc) = self.business_day_calculator {
2290 if !calc.is_business_day(posting_date) {
2291 posting_date = calc.next_business_day(posting_date, false);
2292 if posting_date > self.end_date {
2293 posting_date = calc.prev_business_day(self.end_date, true);
2294 }
2295 }
2296 }
2297 } } if let Some(ref priors) = self.loaded_priors {
2314 let doc_type = Self::document_type_for_process(business_process);
2315 let day_in_period = (posting_date - self.start_date).num_days();
2316 let active = match &priors.multi_segment_window {
2317 Some(msw) => msw.is_active(doc_type, day_in_period),
2318 None => priors.active_window.is_active(doc_type, day_in_period),
2319 };
2320 if !active {
2321 posting_date = self
2326 .temporal_sampler
2327 .sample_date(self.start_date, self.end_date);
2328 if let Some(ref calc) = self.business_day_calculator {
2329 if !calc.is_business_day(posting_date) {
2330 posting_date = calc.next_business_day(posting_date, false);
2331 if posting_date > self.end_date {
2332 posting_date = calc.prev_business_day(self.end_date, true);
2333 }
2334 }
2335 }
2336 }
2337 }
2338
2339 if let Some(ref priors) = self.loaded_priors {
2348 let doc_type = Self::document_type_for_process(business_process);
2349 let hist = priors
2350 .lines_per_je
2351 .by_source
2352 .get(doc_type)
2353 .unwrap_or(&priors.lines_per_je.overall);
2354 let n_total = (hist.sample_bucket(&mut self.rng) as usize).max(2);
2355 let old_debit = line_spec.debit_count.max(1);
2356 let old_credit = line_spec.credit_count.max(1);
2357 let new_debit = (n_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2358 .round() as usize;
2359 let new_debit = new_debit.clamp(1, n_total - 1);
2360 line_spec.total_count = n_total;
2361 line_spec.debit_count = new_debit;
2362 line_spec.credit_count = n_total - new_debit;
2363 }
2364
2365 let fraud_type = self.determine_fraud(business_process);
2369 let is_fraud = fraud_type.is_some();
2370
2371 let time = self.temporal_sampler.sample_time(!is_automated);
2373 let created_at = posting_date.and_time(time).and_utc();
2374
2375 let (created_by, user_persona) = self.select_user(is_automated);
2377
2378 let mut header =
2380 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
2381 header.created_at = created_at;
2382 header.source = source;
2383 header.sap_source_code = sap_source_code;
2384
2385 self.apply_trading_partner_motif(&mut header);
2389
2390 let (created_by, created_at) = {
2395 let sap_code_for_user = header.sap_source_code.clone();
2396 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
2397 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
2398 let new_created_at = if let Some((hour, _)) =
2399 priors.sample_timestamp_for_user(&uid, &mut self.rng)
2400 {
2401 let base = header.created_at;
2402 base.date_naive()
2403 .and_hms_opt(hour, 0, 0)
2404 .map(|naive| naive.and_utc())
2405 .unwrap_or(base)
2406 } else {
2407 header.created_at
2408 };
2409 (uid, new_created_at)
2410 } else {
2411 (created_by, header.created_at)
2412 }
2413 } else {
2414 (created_by, header.created_at)
2415 }
2416 };
2417
2418 header.created_by = created_by;
2419 header.created_at = created_at;
2420 header.user_persona = user_persona;
2421 header.business_process = Some(business_process);
2422 header.document_type = Self::document_type_for_process(business_process).to_string();
2423 header.is_fraud = is_fraud;
2424 header.fraud_type = fraud_type;
2425
2426 let is_manual = matches!(source, TransactionSource::Manual);
2428 header.is_manual = is_manual;
2429
2430 header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
2444
2445 let is_post_close = posting_date.month() == self.end_date.month()
2448 && posting_date.year() == self.end_date.year()
2449 && posting_date.day() > 25;
2450 header.is_post_close = is_post_close;
2451
2452 let created_date = if is_manual {
2455 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
2456 } else {
2457 let lag_days = self.rng.random_range(0i64..=3);
2458 let created_naive_date = posting_date
2459 .checked_sub_signed(chrono::Duration::days(lag_days))
2460 .unwrap_or(posting_date);
2461 created_naive_date.and_hms_opt(
2462 self.rng.random_range(8u32..=17),
2463 self.rng.random_range(0u32..=59),
2464 self.rng.random_range(0u32..=59),
2465 )
2466 };
2467 header.created_date = created_date;
2468
2469 let context = self.build_description_context(business_process, posting_date);
2472
2473 self.apply_header_text_and_reference(&mut header, business_process, &context, posting_date);
2476
2477 let mut entry = JournalEntry::new(header);
2479
2480 let base_amount = if let Some(ft) = fraud_type {
2486 let pattern = self.fraud_type_to_amount_pattern(ft);
2487 self.amount_sampler.sample_fraud(pattern)
2488 } else if let Some(ref mut adv) = self.advanced_amount_sampler {
2489 adv.sample_decimal()
2490 } else {
2491 self.amount_sampler.sample()
2492 };
2493 let base_amount = if fraud_type.is_none() {
2499 let input = self.conditional_input_value(posting_date);
2503 if let Some(ref mut cond) = self.conditional_amount_override {
2504 cond.sample_decimal(input)
2505 } else {
2506 base_amount
2507 }
2508 } else {
2509 base_amount
2510 };
2511
2512 const PRIORS_AMOUNT_BYPASS_SHARE: f64 = 0.25;
2534 let base_amount = if fraud_type.is_none() {
2535 if let Some(src) = entry.header.sap_source_code.as_deref() {
2536 let src_owned = src.to_string();
2537 let use_conditional = self.loaded_priors.is_some()
2540 && self.rng.random_range(0.0..1.0) >= PRIORS_AMOUNT_BYPASS_SHARE;
2541 if use_conditional {
2542 let priors_ref = &mut self.loaded_priors;
2543 let rng_ref = &mut self.rng;
2544 if let Some(priors) = priors_ref {
2545 priors
2546 .sample_amount_for_source(&src_owned, "", rng_ref)
2547 .and_then(|v| {
2548 if v.is_finite() && v > 0.0 {
2549 Decimal::from_f64_retain(v)
2550 } else {
2551 None
2552 }
2553 })
2554 .unwrap_or(base_amount)
2555 } else {
2556 base_amount
2557 }
2558 } else {
2559 base_amount
2560 }
2561 } else {
2562 base_amount
2563 }
2564 } else {
2565 base_amount
2566 };
2567
2568 let base_amount = if fraud_type.is_none() {
2579 if let Some((u, _v)) = copula_uv {
2580 if let Some(ref adv) = self.advanced_amount_sampler {
2581 adv.ppf_decimal(u)
2582 } else {
2583 let log_mult = 4.0 * (u - 0.5);
2584 let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
2585 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2586 }
2587 } else {
2588 base_amount
2589 }
2590 } else {
2591 base_amount
2592 };
2593
2594 let drift_adjusted_amount = {
2596 let drift = self.get_drift_adjustments(posting_date);
2597 if drift.amount_mean_multiplier != 1.0 {
2598 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
2600 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
2601 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2602 } else {
2603 base_amount
2604 }
2605 };
2606
2607 let total_amount = if is_automated {
2609 drift_adjusted_amount } else {
2611 self.apply_human_variation(drift_adjusted_amount)
2612 };
2613
2614 let total_amount = if copula_uv.is_none()
2627 && fraud_type.is_none()
2628 && !self.config.disable_line_count_amount_coupling
2629 {
2630 const LINE_COUNT_AMOUNT_EXPONENT: f64 = 0.85;
2631 let per_side = (line_spec.total_count as f64 / 2.0).max(1.0);
2632 if per_side > 1.0 {
2633 let scaled = (total_amount.to_f64().unwrap_or(0.0)
2636 * per_side.powf(LINE_COUNT_AMOUNT_EXPONENT))
2637 .min(self.config.amounts.max_amount);
2638 Decimal::from_f64_retain(scaled)
2639 .map(|d| d.round_dp(2))
2640 .unwrap_or(total_amount)
2641 } else {
2642 total_amount
2643 }
2644 } else {
2645 total_amount
2646 };
2647
2648 let total_amount = {
2654 let v = total_amount.to_f64().unwrap_or(0.0).clamp(
2655 self.config.amounts.min_amount,
2656 self.config.amounts.max_amount,
2657 );
2658 Decimal::from_f64_retain(v)
2659 .map(|d| d.round_dp(2))
2660 .unwrap_or(total_amount)
2661 };
2662
2663 let doc_type_for_fanout = Self::document_type_for_process(business_process).to_string();
2667
2668 let (gl_neighbor_vec, gl_share_prob): (Vec<String>, f64) =
2673 if let Some(priors) = &self.loaded_priors {
2674 if let Some(motifs) = &priors.cross_entity_motifs {
2675 (
2676 motifs.neighbors(&doc_type_for_fanout).to_vec(),
2677 motifs.should_share(&doc_type_for_fanout),
2678 )
2679 } else {
2680 (Vec::new(), 0.0)
2681 }
2682 } else {
2683 (Vec::new(), 0.0)
2684 };
2685
2686 let reuse_archetype = self.pick_recurring_archetype(
2693 &entry.header.company_code,
2694 &doc_type_for_fanout,
2695 line_spec.debit_count,
2696 line_spec.credit_count,
2697 );
2698 let mut fresh_debit_accts: Vec<String> = Vec::new();
2699 let mut fresh_credit_accts: Vec<String> = Vec::new();
2700 let sota8_active = self.config.source_conditional_account_pair.enabled;
2703
2704 let debit_amounts = self
2706 .amount_sampler
2707 .sample_summing_to(line_spec.debit_count, total_amount);
2708 for (i, amount) in debit_amounts.into_iter().enumerate() {
2709 let debit_fallback = self.select_debit_account().account_number.clone();
2718 let account_number = if sota8_active {
2724 debit_fallback
2725 } else {
2726 let priors_opt = &mut self.loaded_priors;
2727 let rng_ref = &mut self.rng;
2728 if let Some(priors) = priors_opt {
2729 let sp46_gl = entry
2733 .header
2734 .sap_source_code
2735 .as_deref()
2736 .and_then(|code| priors.sample_gl_for_source_role(code, "DR", rng_ref));
2737 if let Some(gl) = sp46_gl {
2738 gl
2739 } else {
2740 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2742 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2743 });
2744 if let Some(gl) = sp37_gl {
2745 gl
2746 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2747 sampler.pick_for_with_neighbors(
2749 &doc_type_for_fanout,
2750 &gl_neighbor_vec,
2751 gl_share_prob,
2752 rng_ref,
2753 )
2754 } else {
2755 debit_fallback
2756 }
2757 }
2758 } else {
2759 debit_fallback
2760 }
2761 };
2762 let mut line = JournalEntryLine::debit(
2763 entry.header.document_id,
2764 (i + 1) as u32,
2765 account_number.clone(),
2766 amount,
2767 );
2768
2769 if self.template_config.descriptions.generate_line_text {
2772 let src = entry.header.sap_source_code.as_deref();
2773 let priors_line = if let Some(s) = src {
2774 if let Some(p) = self.loaded_priors.as_ref() {
2775 let account_class = p
2776 .coa_semantic
2777 .as_ref()
2778 .and_then(|c| c.accounts.get(&account_number))
2779 .and_then(|a| a.account_class.as_deref())
2780 .unwrap_or(
2781 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2782 );
2783 p.sample_line_template(
2785 s,
2786 account_class,
2787 &mut self.md_resolver,
2788 &mut self.rng,
2789 )
2790 } else {
2791 None
2792 }
2793 } else {
2794 None
2795 };
2796 line.line_text = Some(priors_line.unwrap_or_else(|| {
2797 self.description_generator.generate_line_text(
2798 &account_number,
2799 &context,
2800 &mut self.rng,
2801 )
2802 }));
2803 }
2804
2805 if let Some((ref d, _)) = reuse_archetype {
2813 if let Some(a) = d.get(i) {
2814 line.gl_account = a.clone();
2815 }
2816 } else if self.loaded_priors.is_none() {
2817 fresh_debit_accts.push(line.gl_account.clone());
2818 }
2819 entry.add_line(line);
2820 }
2821
2822 let credit_amounts = self
2824 .amount_sampler
2825 .sample_summing_to(line_spec.credit_count, total_amount);
2826 for (i, amount) in credit_amounts.into_iter().enumerate() {
2827 let credit_fallback = self.select_credit_account().account_number.clone();
2829 let account_number = if sota8_active {
2831 credit_fallback
2832 } else {
2833 let priors_opt = &mut self.loaded_priors;
2834 let rng_ref = &mut self.rng;
2835 if let Some(priors) = priors_opt {
2836 let sp46_gl = entry
2837 .header
2838 .sap_source_code
2839 .as_deref()
2840 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
2841 if let Some(gl) = sp46_gl {
2842 gl
2843 } else {
2844 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2845 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2846 });
2847 if let Some(gl) = sp37_gl {
2848 gl
2849 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2850 sampler.pick_for_with_neighbors(
2851 &doc_type_for_fanout,
2852 &gl_neighbor_vec,
2853 gl_share_prob,
2854 rng_ref,
2855 )
2856 } else {
2857 credit_fallback
2858 }
2859 }
2860 } else {
2861 credit_fallback
2862 }
2863 };
2864 let mut line = JournalEntryLine::credit(
2865 entry.header.document_id,
2866 (line_spec.debit_count + i + 1) as u32,
2867 account_number.clone(),
2868 amount,
2869 );
2870
2871 if self.template_config.descriptions.generate_line_text {
2874 let src = entry.header.sap_source_code.as_deref();
2875 let priors_line = if let Some(s) = src {
2876 if let Some(p) = self.loaded_priors.as_ref() {
2877 let account_class = p
2878 .coa_semantic
2879 .as_ref()
2880 .and_then(|c| c.accounts.get(&account_number))
2881 .and_then(|a| a.account_class.as_deref())
2882 .unwrap_or(
2883 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2884 );
2885 p.sample_line_template(
2887 s,
2888 account_class,
2889 &mut self.md_resolver,
2890 &mut self.rng,
2891 )
2892 } else {
2893 None
2894 }
2895 } else {
2896 None
2897 };
2898 line.line_text = Some(priors_line.unwrap_or_else(|| {
2899 self.description_generator.generate_line_text(
2900 &account_number,
2901 &context,
2902 &mut self.rng,
2903 )
2904 }));
2905 }
2906
2907 if let Some((_, ref c)) = reuse_archetype {
2911 if let Some(a) = c.get(i) {
2912 line.gl_account = a.clone();
2913 }
2914 } else if self.loaded_priors.is_none() {
2915 fresh_credit_accts.push(line.gl_account.clone());
2916 }
2917 entry.add_line(line);
2918 }
2919
2920 if reuse_archetype.is_none() {
2923 self.cache_recurring_archetype(
2924 &entry.header.company_code,
2925 &doc_type_for_fanout,
2926 std::mem::take(&mut fresh_debit_accts),
2927 std::mem::take(&mut fresh_credit_accts),
2928 );
2929 }
2930
2931 self.enrich_line_items(&mut entry);
2933
2934 if self.persona_errors_enabled && !is_automated {
2936 self.maybe_inject_persona_error(&mut entry);
2937 }
2938
2939 if self.approval_enabled {
2941 self.maybe_apply_approval_workflow(&mut entry, posting_date);
2942 }
2943
2944 self.populate_approval_fields(&mut entry, posting_date);
2946
2947 self.maybe_start_batch(&entry);
2949
2950 if self.velocity_calibrator.is_some() {
2953 let mut pending: Vec<crate::velocity_calibrator::CalibrationStep> = Vec::new();
2954 for line in &entry.lines {
2955 if let Some(step) = self
2956 .velocity_calibrator
2957 .as_mut()
2958 .and_then(|cal| cal.observe_line(line))
2959 {
2960 pending.push(step);
2961 }
2962 }
2963 for step in pending {
2964 self.apply_calibration_step(&step);
2965 }
2966 }
2967
2968 self.apply_company_currency(&mut entry.header);
2971
2972 self.maybe_apply_foreign_currency(&mut entry);
2975
2976 self.record_for_reversal(&entry);
2978
2979 entry
2980 }
2981
2982 fn apply_calibration_step(&mut self, step: &crate::velocity_calibrator::CalibrationStep) {
2990 match step.parameter.as_str() {
2991 "amounts.lognormal_sigma" => {
2992 self.amount_sampler.set_lognormal_sigma(step.new_value);
2993 }
2994 "amounts.round_dollar_share" => {
2995 self.amount_sampler
2996 .set_round_number_probability(step.new_value);
2997 }
2998 _ => {
2999 }
3002 }
3003 }
3004
3005 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
3010 self.persona_errors_enabled = enabled;
3011 self
3012 }
3013
3014 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
3019 self.fraud_config = config;
3020 self
3021 }
3022
3023 pub fn persona_errors_enabled(&self) -> bool {
3025 self.persona_errors_enabled
3026 }
3027
3028 pub fn with_batching(mut self, enabled: bool) -> Self {
3033 self.batching_enabled = enabled;
3034 if !enabled {
3035 self.batch_state = None;
3036 }
3037 self
3038 }
3039
3040 pub fn batching_enabled(&self) -> bool {
3042 self.batching_enabled
3043 }
3044
3045 pub fn with_company_currencies(
3050 mut self,
3051 currencies: std::collections::HashMap<String, String>,
3052 ) -> Self {
3053 self.company_currencies = currencies;
3054 self
3055 }
3056
3057 fn apply_company_currency(&self, header: &mut JournalEntryHeader) {
3060 if let Some(ccy) = self.company_currencies.get(&header.company_code) {
3061 header.currency = ccy.clone();
3062 }
3063 }
3064
3065 fn apply_trading_partner_motif(&mut self, header: &mut JournalEntryHeader) {
3077 let code_opt = header.sap_source_code.clone();
3078 if let Some(ref code) = code_opt {
3079 let rng_ref = &mut self.rng;
3080 let tp_neighbors: Vec<String> = if let Some(ref priors) = self.loaded_priors {
3081 if let Some(ref motifs) = priors.tp_motif_sampler {
3082 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3083 motifs.neighbors(last_tp).to_vec()
3084 } else {
3085 Vec::new()
3086 }
3087 } else {
3088 Vec::new()
3089 }
3090 } else {
3091 Vec::new()
3092 };
3093 let tp_share_prob: f64 = if let Some(ref priors) = self.loaded_priors {
3094 if let Some(ref motifs) = priors.tp_motif_sampler {
3095 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3096 motifs.should_share(last_tp)
3097 } else {
3098 0.0
3099 }
3100 } else {
3101 0.0
3102 }
3103 } else {
3104 0.0
3105 };
3106 if let Some(ref mut priors) = self.loaded_priors {
3107 use datasynth_core::distributions::behavioral_priors::CategoricalDistribution;
3108 let tp = if !tp_neighbors.is_empty()
3109 && tp_share_prob > 0.0
3110 && rng_ref.random_range(0.0..1.0) < tp_share_prob
3111 {
3112 let filtered: std::collections::BTreeMap<String, f64> = priors
3113 .per_source_attribute
3114 .as_ref()
3115 .and_then(|psa| psa.conditional(code, "trading_partner"))
3116 .map(|dist| {
3117 dist.probabilities
3118 .iter()
3119 .filter(|(v, _)| tp_neighbors.contains(v))
3120 .map(|(v, p)| (v.clone(), *p))
3121 .collect()
3122 })
3123 .unwrap_or_default();
3124 if filtered.is_empty() {
3125 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3126 } else {
3127 let neighbour_dist = CategoricalDistribution {
3128 probabilities: filtered,
3129 n: 0,
3130 };
3131 neighbour_dist.sample(rng_ref).or_else(|| {
3132 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3133 })
3134 }
3135 } else {
3136 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3137 };
3138 header.trading_partner = tp;
3139 }
3140 if let Some(ref tp) = header.trading_partner {
3141 self.last_tp_by_source.insert(code.clone(), tp.clone());
3142 }
3143 }
3144 }
3145
3146 fn build_description_context(
3149 &mut self,
3150 business_process: BusinessProcess,
3151 posting_date: chrono::NaiveDate,
3152 ) -> DescriptionContext {
3153 let mut context =
3154 DescriptionContext::with_period(posting_date.month(), posting_date.year());
3155 match business_process {
3156 BusinessProcess::P2P => {
3157 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
3158 context.vendor_name = Some(vendor.name.clone());
3159 }
3160 }
3161 BusinessProcess::O2C => {
3162 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
3163 context.customer_name = Some(customer.name.clone());
3164 }
3165 }
3166 _ => {}
3167 }
3168 context
3169 }
3170
3171 fn apply_header_text_and_reference(
3175 &mut self,
3176 header: &mut JournalEntryHeader,
3177 business_process: BusinessProcess,
3178 context: &DescriptionContext,
3179 posting_date: chrono::NaiveDate,
3180 ) {
3181 if self.template_config.descriptions.generate_header_text {
3182 let priors_header = if let Some(src) = header.sap_source_code.as_deref() {
3183 if let Some(p) = self.loaded_priors.as_ref() {
3184 p.sample_header_template(src, &mut self.md_resolver, &mut self.rng)
3185 } else {
3186 None
3187 }
3188 } else {
3189 None
3190 };
3191 header.header_text = Some(priors_header.unwrap_or_else(|| {
3192 self.description_generator.generate_header_text(
3193 business_process,
3194 context,
3195 &mut self.rng,
3196 )
3197 }));
3198 }
3199 if self.template_config.references.generate_references {
3200 let priors_ref = header.sap_source_code.as_deref().and_then(|src| {
3201 self.loaded_priors
3202 .as_ref()
3203 .and_then(|p| p.sample_reference(src, &mut self.rng))
3204 });
3205 header.reference = Some(priors_ref.unwrap_or_else(|| {
3206 self.reference_generator
3207 .generate_for_process_year(business_process, posting_date.year())
3208 }));
3209 }
3210 header.source_document = header
3211 .reference
3212 .as_deref()
3213 .and_then(DocumentRef::parse)
3214 .or_else(|| {
3215 if header.source == TransactionSource::Manual {
3216 Some(DocumentRef::Manual)
3217 } else {
3218 None
3219 }
3220 });
3221 }
3222
3223 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
3228 if !self.batching_enabled {
3230 return;
3231 }
3232 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
3234 return;
3235 }
3236
3237 if self.rng.random::<f64>() > 0.15 {
3239 return;
3240 }
3241
3242 let base_account = entry
3244 .lines
3245 .first()
3246 .map(|l| l.gl_account.clone())
3247 .unwrap_or_default();
3248
3249 let base_amount = entry.total_debit();
3250
3251 self.batch_state = Some(BatchState {
3252 base_account_number: base_account,
3253 base_amount,
3254 base_business_process: entry.header.business_process,
3255 base_posting_date: entry.header.posting_date,
3256 remaining: self.rng.random_range(2..7), });
3258 }
3259
3260 fn generate_batched_entry(&mut self) -> JournalEntry {
3268 use rust_decimal::Decimal;
3269
3270 if let Some(ref mut state) = self.batch_state {
3272 state.remaining = state.remaining.saturating_sub(1);
3273 }
3274
3275 let Some(batch) = self.batch_state.clone() else {
3276 tracing::warn!(
3279 "generate_batched_entry called without batch_state; generating standard entry"
3280 );
3281 self.batch_state = None;
3282 return self.generate();
3283 };
3284
3285 let posting_date = batch.base_posting_date;
3287
3288 self.count += 1;
3289 let document_id = self.generate_deterministic_uuid();
3290
3291 let company_code = self.company_selector.select(&mut self.rng).to_string();
3293
3294 let _line_spec = LineItemSpec {
3296 total_count: 2,
3297 debit_count: 1,
3298 credit_count: 1,
3299 split_type: DebitCreditSplit::Equal,
3300 };
3301
3302 let source = TransactionSource::Manual;
3304
3305 let sap_source_code: Option<String> = self.sample_sap_source_code();
3307 self.current_je_source = sap_source_code.clone();
3309
3310 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
3312
3313 let time = self.temporal_sampler.sample_time(true);
3315 let created_at = posting_date.and_time(time).and_utc();
3316
3317 let (created_by, user_persona) = self.select_user(false);
3319
3320 let mut header =
3322 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
3323 header.created_at = created_at;
3324 header.source = source;
3325 header.sap_source_code = sap_source_code;
3326
3327 self.apply_trading_partner_motif(&mut header);
3330
3331 let (created_by, created_at) = {
3333 let sap_code_for_user = header.sap_source_code.clone();
3334 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
3335 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
3336 let new_created_at = if let Some((hour, _)) =
3337 priors.sample_timestamp_for_user(&uid, &mut self.rng)
3338 {
3339 let base = header.created_at;
3340 base.date_naive()
3341 .and_hms_opt(hour, 0, 0)
3342 .map(|naive| naive.and_utc())
3343 .unwrap_or(base)
3344 } else {
3345 header.created_at
3346 };
3347 (uid, new_created_at)
3348 } else {
3349 (created_by, header.created_at)
3350 }
3351 } else {
3352 (created_by, header.created_at)
3353 }
3354 };
3355
3356 header.created_by = created_by;
3357 header.created_at = created_at;
3358 header.user_persona = user_persona;
3359 header.business_process = Some(business_process);
3360 header.document_type = Self::document_type_for_process(business_process).to_string();
3361
3362 header.source_document = Some(DocumentRef::Manual);
3364
3365 header.is_manual = true;
3367 header.source_system = if self.rng.random::<f64>() < 0.70 {
3368 "manual".to_string()
3369 } else {
3370 "spreadsheet".to_string()
3371 };
3372 header.is_post_close = posting_date.month() == self.end_date.month()
3373 && posting_date.year() == self.end_date.year()
3374 && posting_date.day() > 25;
3375 header.created_date =
3376 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
3377
3378 let variation = self.rng.random_range(-0.15..0.15);
3380 let varied_amount =
3381 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
3382 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
3383
3384 let context = self.build_description_context(business_process, posting_date);
3388 self.apply_header_text_and_reference(&mut header, business_process, &context, posting_date);
3389
3390 let mut entry = JournalEntry::new(header);
3392
3393 let debit_line = JournalEntryLine::debit(
3395 entry.header.document_id,
3396 1,
3397 batch.base_account_number.clone(),
3398 total_amount,
3399 );
3400 entry.add_line(debit_line);
3401
3402 let credit_fallback = self.select_credit_account().account_number.clone();
3409 let credit_account = {
3410 let priors_opt = &mut self.loaded_priors;
3411 let rng_ref = &mut self.rng;
3412 if let Some(priors) = priors_opt {
3413 let sp46_gl = entry
3416 .header
3417 .sap_source_code
3418 .as_deref()
3419 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
3420 if let Some(gl) = sp46_gl {
3421 gl
3422 } else {
3423 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
3424 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
3425 });
3426 sp37_gl.unwrap_or(credit_fallback)
3427 }
3428 } else {
3429 credit_fallback
3430 }
3431 };
3432 let credit_line =
3433 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
3434 entry.add_line(credit_line);
3435
3436 self.enrich_line_items(&mut entry);
3438
3439 if self.persona_errors_enabled {
3441 self.maybe_inject_persona_error(&mut entry);
3442 }
3443
3444 if self.approval_enabled {
3446 self.maybe_apply_approval_workflow(&mut entry, posting_date);
3447 }
3448
3449 self.populate_approval_fields(&mut entry, posting_date);
3451
3452 if batch.remaining <= 1 {
3454 self.batch_state = None;
3455 }
3456
3457 self.apply_company_currency(&mut entry.header);
3459
3460 entry
3461 }
3462
3463 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
3465 let persona_str = &entry.header.user_persona;
3467 let persona = match persona_str.to_lowercase().as_str() {
3468 s if s.contains("junior") => UserPersona::JuniorAccountant,
3469 s if s.contains("senior") => UserPersona::SeniorAccountant,
3470 s if s.contains("controller") => UserPersona::Controller,
3471 s if s.contains("manager") => UserPersona::Manager,
3472 s if s.contains("executive") => UserPersona::Executive,
3473 _ => return, };
3475
3476 let base_error_rate = persona.error_rate();
3478
3479 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
3481
3482 if self.rng.random::<f64>() >= adjusted_rate {
3484 return; }
3486
3487 self.inject_human_error(entry, persona);
3489 }
3490
3491 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
3500 use chrono::Datelike;
3501
3502 let mut rate = base_rate;
3503 let day = posting_date.day();
3504 let month = posting_date.month();
3505
3506 if month == 12 && day >= 28 {
3508 rate *= 2.0;
3509 return rate.min(0.5); }
3511
3512 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
3514 rate *= 1.75; return rate.min(0.4);
3516 }
3517
3518 if day >= 28 {
3520 rate *= 1.5; }
3522
3523 let weekday = posting_date.weekday();
3525 match weekday {
3526 chrono::Weekday::Mon => {
3527 rate *= 1.2;
3529 }
3530 chrono::Weekday::Fri => {
3531 rate *= 1.3;
3533 }
3534 _ => {}
3535 }
3536
3537 rate.min(0.4)
3539 }
3540
3541 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
3550 use rust_decimal::Decimal;
3551
3552 if amount < Decimal::from(10) {
3554 return amount;
3555 }
3556
3557 if self.rng.random::<f64>() > 0.70 {
3559 return amount;
3560 }
3561
3562 let variation_type: u8 = self.rng.random_range(0..4);
3564
3565 match variation_type {
3566 0 => {
3567 let variation_pct = self.rng.random_range(-0.02..0.02);
3569 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
3570 (amount + variation).round_dp(2)
3571 }
3572 1 => {
3573 let ten = Decimal::from(10);
3575 (amount / ten).round() * ten
3576 }
3577 2 => {
3578 if amount >= Decimal::from(500) {
3580 let hundred = Decimal::from(100);
3581 (amount / hundred).round() * hundred
3582 } else {
3583 amount
3584 }
3585 }
3586 3 => {
3587 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
3589 (amount + cents).max(Decimal::ZERO).round_dp(2)
3590 }
3591 _ => amount,
3592 }
3593 }
3594
3595 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
3601 let balancing_idx = entry.lines.iter().position(|l| {
3603 if modified_was_debit {
3604 l.credit_amount > Decimal::ZERO
3605 } else {
3606 l.debit_amount > Decimal::ZERO
3607 }
3608 });
3609
3610 if let Some(idx) = balancing_idx {
3611 if modified_was_debit {
3612 entry.lines[idx].credit_amount += impact;
3613 } else {
3614 entry.lines[idx].debit_amount += impact;
3615 }
3616 }
3617 }
3618
3619 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
3624 use rust_decimal::Decimal;
3625
3626 let error_type: u8 = match persona {
3628 UserPersona::JuniorAccountant => {
3629 self.rng.random_range(0..5)
3631 }
3632 UserPersona::SeniorAccountant => {
3633 self.rng.random_range(0..3)
3635 }
3636 UserPersona::Controller | UserPersona::Manager => {
3637 self.rng.random_range(3..5)
3639 }
3640 _ => return,
3641 };
3642
3643 match error_type {
3644 0 => {
3645 if let Some(line) = entry.lines.get_mut(0) {
3647 let is_debit = line.debit_amount > Decimal::ZERO;
3648 let original_amount = if is_debit {
3649 line.debit_amount
3650 } else {
3651 line.credit_amount
3652 };
3653
3654 let s = original_amount.to_string();
3656 if s.len() >= 2 {
3657 let chars: Vec<char> = s.chars().collect();
3658 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
3659 if chars[pos].is_ascii_digit()
3660 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
3661 {
3662 let mut new_chars = chars;
3663 new_chars.swap(pos, pos + 1);
3664 if let Ok(new_amount) =
3665 new_chars.into_iter().collect::<String>().parse::<Decimal>()
3666 {
3667 let impact = new_amount - original_amount;
3668
3669 if is_debit {
3671 entry.lines[0].debit_amount = new_amount;
3672 } else {
3673 entry.lines[0].credit_amount = new_amount;
3674 }
3675
3676 Self::rebalance_entry(entry, is_debit, impact);
3678
3679 entry.header.header_text = Some(
3680 entry.header.header_text.clone().unwrap_or_default()
3681 + " [HUMAN_ERROR:TRANSPOSITION]",
3682 );
3683 }
3684 }
3685 }
3686 }
3687 }
3688 1 => {
3689 if let Some(line) = entry.lines.get_mut(0) {
3691 let is_debit = line.debit_amount > Decimal::ZERO;
3692 let original_amount = if is_debit {
3693 line.debit_amount
3694 } else {
3695 line.credit_amount
3696 };
3697
3698 let new_amount = original_amount * Decimal::new(10, 0);
3699 let impact = new_amount - original_amount;
3700
3701 if is_debit {
3703 entry.lines[0].debit_amount = new_amount;
3704 } else {
3705 entry.lines[0].credit_amount = new_amount;
3706 }
3707
3708 Self::rebalance_entry(entry, is_debit, impact);
3710
3711 entry.header.header_text = Some(
3712 entry.header.header_text.clone().unwrap_or_default()
3713 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
3714 );
3715 }
3716 }
3717 2 => {
3718 if let Some(ref mut text) = entry.header.header_text {
3720 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
3721 let correct = ["the", "and", "with", "that", "receive"];
3722 let idx = self.rng.random_range(0..typos.len());
3723 if text.to_lowercase().contains(correct[idx]) {
3724 *text = text.replace(correct[idx], typos[idx]);
3725 *text = format!("{text} [HUMAN_ERROR:TYPO]");
3726 }
3727 }
3728 }
3729 3 => {
3730 if let Some(line) = entry.lines.get_mut(0) {
3732 let is_debit = line.debit_amount > Decimal::ZERO;
3733 let original_amount = if is_debit {
3734 line.debit_amount
3735 } else {
3736 line.credit_amount
3737 };
3738
3739 let new_amount =
3740 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
3741 let impact = new_amount - original_amount;
3742
3743 if is_debit {
3745 entry.lines[0].debit_amount = new_amount;
3746 } else {
3747 entry.lines[0].credit_amount = new_amount;
3748 }
3749
3750 Self::rebalance_entry(entry, is_debit, impact);
3752
3753 entry.header.header_text = Some(
3754 entry.header.header_text.clone().unwrap_or_default()
3755 + " [HUMAN_ERROR:ROUNDED]",
3756 );
3757 }
3758 }
3759 4 if entry.header.document_date == entry.header.posting_date => {
3762 let days_late = self.rng.random_range(5..15);
3763 entry.header.document_date =
3764 entry.header.posting_date - chrono::Duration::days(days_late);
3765 entry.header.header_text = Some(
3766 entry.header.header_text.clone().unwrap_or_default()
3767 + " [HUMAN_ERROR:LATE_POSTING]",
3768 );
3769 }
3770 _ => {}
3771 }
3772 }
3773
3774 fn maybe_apply_approval_workflow(
3779 &mut self,
3780 entry: &mut JournalEntry,
3781 _posting_date: NaiveDate,
3782 ) {
3783 use rust_decimal::Decimal;
3784
3785 let amount = entry.total_debit();
3786
3787 if amount <= self.approval_threshold {
3789 let workflow = ApprovalWorkflow::auto_approved(
3791 entry.header.created_by.clone(),
3792 entry.header.user_persona.clone(),
3793 amount,
3794 entry.header.created_at,
3795 );
3796 entry.header.approval_workflow = Some(workflow);
3797 return;
3798 }
3799
3800 entry.header.sox_relevant = true;
3802
3803 let required_levels = if amount > Decimal::new(100000, 0) {
3805 3 } else if amount > Decimal::new(50000, 0) {
3807 2 } else {
3809 1 };
3811
3812 let mut workflow = ApprovalWorkflow::new(
3814 entry.header.created_by.clone(),
3815 entry.header.user_persona.clone(),
3816 amount,
3817 );
3818 workflow.required_levels = required_levels;
3819
3820 let submit_time = entry.header.created_at;
3822 let submit_action = ApprovalAction::new(
3823 entry.header.created_by.clone(),
3824 entry.header.user_persona.clone(),
3825 self.parse_persona(&entry.header.user_persona),
3826 ApprovalActionType::Submit,
3827 0,
3828 )
3829 .with_timestamp(submit_time);
3830
3831 workflow.actions.push(submit_action);
3832 workflow.status = ApprovalStatus::Pending;
3833 workflow.submitted_at = Some(submit_time);
3834
3835 let mut current_time = submit_time;
3837 for level in 1..=required_levels {
3838 let delay_hours = self.rng.random_range(1..4);
3840 current_time += chrono::Duration::hours(delay_hours);
3841
3842 while current_time.weekday() == chrono::Weekday::Sat
3844 || current_time.weekday() == chrono::Weekday::Sun
3845 {
3846 current_time += chrono::Duration::days(1);
3847 }
3848
3849 let (approver_id, approver_role) = self.select_approver(level);
3851
3852 let approve_action = ApprovalAction::new(
3853 approver_id.clone(),
3854 approver_role.to_string(),
3855 approver_role,
3856 ApprovalActionType::Approve,
3857 level,
3858 )
3859 .with_timestamp(current_time);
3860
3861 workflow.actions.push(approve_action);
3862 workflow.current_level = level;
3863 }
3864
3865 workflow.status = ApprovalStatus::Approved;
3867 workflow.approved_at = Some(current_time);
3868
3869 entry.header.approval_workflow = Some(workflow);
3870 }
3871
3872 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
3874 let persona = match level {
3875 1 => UserPersona::Manager,
3876 2 => UserPersona::Controller,
3877 _ => UserPersona::Executive,
3878 };
3879
3880 if let Some(ref pool) = self.user_pool {
3882 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
3883 return (user.user_id.clone(), persona);
3884 }
3885 }
3886
3887 let approver_id = match persona {
3889 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
3890 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
3891 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
3892 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
3893 };
3894
3895 (approver_id, persona)
3896 }
3897
3898 fn parse_persona(&self, persona_str: &str) -> UserPersona {
3900 match persona_str.to_lowercase().as_str() {
3901 s if s.contains("junior") => UserPersona::JuniorAccountant,
3902 s if s.contains("senior") => UserPersona::SeniorAccountant,
3903 s if s.contains("controller") => UserPersona::Controller,
3904 s if s.contains("manager") => UserPersona::Manager,
3905 s if s.contains("executive") => UserPersona::Executive,
3906 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
3907 _ => UserPersona::JuniorAccountant, }
3909 }
3910
3911 pub fn with_approval(mut self, enabled: bool) -> Self {
3913 self.approval_enabled = enabled;
3914 self
3915 }
3916
3917 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
3919 self.approval_threshold = threshold;
3920 self
3921 }
3922
3923 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
3929 self.sod_violation_rate = rate;
3930 self
3931 }
3932
3933 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
3936 if let Some(ref workflow) = entry.header.approval_workflow {
3937 let last_approver = workflow
3939 .actions
3940 .iter()
3941 .rev()
3942 .find(|a| matches!(a.action, ApprovalActionType::Approve));
3943
3944 if let Some(approver_action) = last_approver {
3945 entry.header.approved_by = Some(approver_action.actor_id.clone());
3946 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
3947 } else {
3948 entry.header.approved_by = Some(workflow.preparer_id.clone());
3950 entry.header.approval_date = Some(posting_date);
3951 }
3952
3953 if self.rng.random::<f64>() < self.sod_violation_rate {
3955 let creator = entry.header.created_by.clone();
3956 entry.header.approved_by = Some(creator);
3957 entry.header.sod_violation = true;
3958 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
3959 }
3960 }
3961 }
3962
3963 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
3969 self.drift_controller = Some(controller);
3970 self
3971 }
3972
3973 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
3978 if config.enabled {
3979 let total_periods = self.calculate_total_periods();
3980 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
3981 }
3982 self
3983 }
3984
3985 fn calculate_total_periods(&self) -> u32 {
3987 let start_year = self.start_date.year();
3988 let start_month = self.start_date.month();
3989 let end_year = self.end_date.year();
3990 let end_month = self.end_date.month();
3991
3992 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
3993 }
3994
3995 fn date_to_period(&self, date: NaiveDate) -> u32 {
3997 let start_year = self.start_date.year();
3998 let start_month = self.start_date.month() as i32;
3999 let date_year = date.year();
4000 let date_month = date.month() as i32;
4001
4002 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
4003 }
4004
4005 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
4007 if let Some(ref controller) = self.drift_controller {
4008 let period = self.date_to_period(date);
4009 controller.compute_adjustments(period)
4010 } else {
4011 DriftAdjustments::none()
4012 }
4013 }
4014
4015 #[inline]
4017 fn select_user(&mut self, is_automated: bool) -> (String, String) {
4018 if let Some(ref pool) = self.user_pool {
4019 let persona = if is_automated {
4020 UserPersona::AutomatedSystem
4021 } else {
4022 let roll: f64 = self.rng.random();
4024 if roll < 0.4 {
4025 UserPersona::JuniorAccountant
4026 } else if roll < 0.7 {
4027 UserPersona::SeniorAccountant
4028 } else if roll < 0.85 {
4029 UserPersona::Controller
4030 } else {
4031 UserPersona::Manager
4032 }
4033 };
4034
4035 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
4036 return (user.user_id.clone(), user.persona.to_string());
4037 }
4038 }
4039
4040 if is_automated {
4042 (
4043 format!("BATCH{:04}", self.rng.random_range(1..=20)),
4044 "automated_system".to_string(),
4045 )
4046 } else {
4047 (
4048 format!("USER{:04}", self.rng.random_range(1..=40)),
4049 "senior_accountant".to_string(),
4050 )
4051 }
4052 }
4053
4054 #[inline]
4056 fn select_source(&mut self) -> TransactionSource {
4057 let roll: f64 = self.rng.random();
4058 let dist = &self.config.source_distribution;
4059
4060 if roll < dist.manual {
4061 TransactionSource::Manual
4062 } else if roll < dist.manual + dist.automated {
4063 TransactionSource::Automated
4064 } else if roll < dist.manual + dist.automated + dist.recurring {
4065 TransactionSource::Recurring
4066 } else {
4067 TransactionSource::Adjustment
4068 }
4069 }
4070
4071 #[inline]
4073 fn document_type_for_process(process: BusinessProcess) -> &'static str {
4082 match process {
4083 BusinessProcess::P2P => "KR",
4084 BusinessProcess::O2C => "DR",
4085 BusinessProcess::R2R => "SA",
4086 BusinessProcess::H2R => "HR",
4087 BusinessProcess::A2R => "AA",
4088 _ => "SA",
4089 }
4090 }
4091
4092 fn select_business_process(&mut self) -> BusinessProcess {
4093 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
4094 }
4095
4096 #[inline]
4101 fn power_law_index(n: usize, rng: &mut ChaCha8Rng) -> Option<usize> {
4102 if n == 0 || n > ZIPF_CAP {
4103 return None;
4104 }
4105 let total = ZIPF_CUM[n];
4106 let r = rng.random::<f64>() * total;
4107 let k = ZIPF_CUM[..=n]
4109 .binary_search_by(|v| v.partial_cmp(&r).unwrap_or(std::cmp::Ordering::Less))
4110 .unwrap_or_else(|e| e);
4111 Some(k.saturating_sub(1).min(n - 1))
4112 }
4113
4114 #[inline]
4121 fn concentrate<'a>(
4122 enabled: bool,
4123 rng: &mut ChaCha8Rng,
4124 all: &[&'a GLAccount],
4125 uniform: Option<&'a GLAccount>,
4126 ) -> Option<&'a GLAccount> {
4127 if enabled {
4128 Self::power_law_index(all.len(), rng)
4129 .map(|i| all[i])
4130 .or(uniform)
4131 } else {
4132 uniform
4133 }
4134 }
4135
4136 fn ensure_cond_pair_pool(&mut self, source: &str) {
4139 let cfg = &self.config.source_conditional_account_pair;
4140 if !cfg.enabled {
4141 return;
4142 }
4143 if self.cond_pair_sampler.is_none() {
4144 self.cond_pair_sampler = Some(Default::default());
4145 }
4146 let sampler = self
4147 .cond_pair_sampler
4148 .as_mut()
4149 .expect("just-initialised above");
4150 if sampler.pool(source).is_some() {
4151 return;
4152 }
4153 let all_accounts: Vec<String> = self
4154 .coa
4155 .accounts
4156 .iter()
4157 .map(|a| a.account_number.clone())
4158 .collect();
4159 if all_accounts.is_empty() {
4160 return;
4161 }
4162 let weights: Vec<f64> = vec![1.0; all_accounts.len()];
4165 sampler.ensure_pool(
4166 source,
4167 &all_accounts,
4168 &weights,
4169 cfg.accts_per_source_target,
4170 cfg.concentration,
4171 &mut self.cond_pair_rng,
4172 );
4173 }
4174
4175 #[inline]
4180 fn try_cond_pick_account_number(&mut self) -> Option<String> {
4181 let cfg = &self.config.source_conditional_account_pair;
4182 if !cfg.enabled {
4183 return None;
4184 }
4185 let src = self.current_je_source.clone()?;
4186 self.ensure_cond_pair_pool(&src);
4187 let sampler = self.cond_pair_sampler.as_ref()?;
4188 let pool = sampler.pool(&src)?;
4189 Some(pool.sample_one(&mut self.cond_pair_rng).to_string())
4190 }
4191
4192 #[inline]
4193 fn select_debit_account(&mut self) -> &GLAccount {
4194 if let Some(acct_num) = self.try_cond_pick_account_number() {
4196 if let Some(a) = self
4197 .coa
4198 .accounts
4199 .iter()
4200 .find(|a| a.account_number == acct_num)
4201 {
4202 return a;
4203 }
4204 }
4206 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
4207 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
4208
4209 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4211 accounts
4212 } else {
4213 expense_accounts
4214 };
4215
4216 let uniform = all.choose(&mut self.rng).copied();
4217 let enabled = self.config.account_concentration.unwrap_or(true);
4218 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4219 tracing::warn!(
4220 "Account selection returned empty list, falling back to first COA account"
4221 );
4222 &self.coa.accounts[0]
4223 })
4224 }
4225
4226 #[inline]
4227 fn select_credit_account(&mut self) -> &GLAccount {
4228 if let Some(acct_num) = self.try_cond_pick_account_number() {
4230 if let Some(a) = self
4231 .coa
4232 .accounts
4233 .iter()
4234 .find(|a| a.account_number == acct_num)
4235 {
4236 return a;
4237 }
4238 }
4239 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
4240 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
4241
4242 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4244 liability_accounts
4245 } else {
4246 revenue_accounts
4247 };
4248
4249 let uniform = all.choose(&mut self.rng).copied();
4250 let enabled = self.config.account_concentration.unwrap_or(true);
4251 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4252 tracing::warn!(
4253 "Account selection returned empty list, falling back to first COA account"
4254 );
4255 &self.coa.accounts[0]
4256 })
4257 }
4258}
4259
4260impl Generator for JournalEntryGenerator {
4261 type Item = JournalEntry;
4262 type Config = (
4263 TransactionConfig,
4264 Arc<ChartOfAccounts>,
4265 Vec<String>,
4266 NaiveDate,
4267 NaiveDate,
4268 );
4269
4270 fn new(config: Self::Config, seed: u64) -> Self {
4271 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
4272 }
4273
4274 fn generate_one(&mut self) -> Self::Item {
4275 self.generate()
4276 }
4277
4278 fn reset(&mut self) {
4279 self.rng = seeded_rng(self.seed, 0);
4280 self.source_mix_rng = seeded_rng(self.seed, 50_063);
4281 self.template_rng = seeded_rng(self.seed, 70_081);
4282 self.recurring_archetypes.clear();
4283 self.reversal_rng = seeded_rng(self.seed, 90_017);
4284 self.reversal_buffer.clear();
4285 self.account_rng = seeded_rng(self.seed, 60_071);
4286 self.allocation_rng = seeded_rng(self.seed, 80_023);
4287 self.fx_rng = seeded_rng(self.seed, 70_093);
4288 self.line_sampler.reset(self.seed + 1);
4289 self.amount_sampler.reset(self.seed + 2);
4290 self.temporal_sampler.reset(self.seed + 3);
4291 if let Some(ref mut adv) = self.advanced_amount_sampler {
4292 adv.reset(self.seed + 2);
4293 }
4294 self.count = 0;
4295 self.uuid_factory.reset();
4296
4297 let mut ref_gen = ReferenceGenerator::new(
4299 self.start_date.year(),
4300 self.companies
4301 .first()
4302 .map(std::string::String::as_str)
4303 .unwrap_or("1000"),
4304 );
4305 ref_gen.set_prefix(
4306 ReferenceType::Invoice,
4307 &self.template_config.references.invoice_prefix,
4308 );
4309 ref_gen.set_prefix(
4310 ReferenceType::PurchaseOrder,
4311 &self.template_config.references.po_prefix,
4312 );
4313 ref_gen.set_prefix(
4314 ReferenceType::SalesOrder,
4315 &self.template_config.references.so_prefix,
4316 );
4317 self.reference_generator = ref_gen;
4318 }
4319
4320 fn count(&self) -> u64 {
4321 self.count
4322 }
4323
4324 fn seed(&self) -> u64 {
4325 self.seed
4326 }
4327}
4328
4329use datasynth_core::traits::ParallelGenerator;
4330
4331impl ParallelGenerator for JournalEntryGenerator {
4332 fn split(self, parts: usize) -> Vec<Self> {
4338 let parts = parts.max(1);
4339 (0..parts)
4340 .map(|i| {
4341 let sub_seed = self
4343 .seed
4344 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
4345
4346 let mut gen = JournalEntryGenerator::new_with_full_config(
4347 self.config.clone(),
4348 Arc::clone(&self.coa),
4349 self.companies.clone(),
4350 self.start_date,
4351 self.end_date,
4352 sub_seed,
4353 self.template_config.clone(),
4354 self.user_pool.clone(),
4355 );
4356
4357 gen.company_selector = self.company_selector.clone();
4359 gen.vendor_pool = self.vendor_pool.clone();
4360 gen.customer_pool = self.customer_pool.clone();
4361 gen.material_pool = self.material_pool.clone();
4362 gen.cost_center_pool = self.cost_center_pool.clone();
4368 gen.profit_center_pool = self.profit_center_pool.clone();
4369 gen.using_real_master_data = self.using_real_master_data;
4370 gen.fraud_config = self.fraud_config.clone();
4371 gen.persona_errors_enabled = self.persona_errors_enabled;
4372 gen.company_currencies = self.company_currencies.clone();
4375 gen.batching_enabled = self.batching_enabled;
4377 gen.approval_enabled = self.approval_enabled;
4378 gen.approval_threshold = self.approval_threshold;
4379 gen.sod_violation_rate = self.sod_violation_rate;
4380 if let Some(mut adv) = self.advanced_amount_sampler.clone() {
4385 adv.reset(sub_seed.wrapping_add(2));
4386 gen.advanced_amount_sampler = Some(adv);
4387 }
4388 if let Some(mut cond) = self.conditional_amount_override.clone() {
4391 cond.reset(sub_seed.wrapping_add(17));
4392 gen.conditional_amount_override = Some(cond);
4393 }
4394 if let Some(mut cop) = self.correlation_copula.clone() {
4396 cop.reset(sub_seed.wrapping_add(31));
4397 gen.correlation_copula = Some(cop);
4398 }
4399
4400 gen.uuid_factory = DeterministicUuidFactory::for_partition(
4402 sub_seed,
4403 GeneratorType::JournalEntry,
4404 i as u8,
4405 );
4406
4407 if let Some(ref config) = self.temporal_patterns_config {
4409 gen.temporal_patterns_config = Some(config.clone());
4410 if config.business_days.enabled {
4412 if let Some(ref bdc) = self.business_day_calculator {
4413 gen.business_day_calculator = Some(bdc.clone());
4414 }
4415 }
4416 if config.processing_lags.enabled {
4418 let lag_config =
4419 Self::convert_processing_lag_config(&config.processing_lags);
4420 gen.processing_lag_calculator =
4421 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
4422 }
4423 }
4424
4425 if let Some(ref dc) = self.drift_controller {
4427 gen.drift_controller = Some(dc.clone());
4428 }
4429
4430 gen.loaded_priors = self.loaded_priors.clone();
4433
4434 if let Some(ref cal) = self.velocity_calibrator {
4439 let mut fresh = crate::velocity_calibrator::VelocityCalibrator::new(
4440 cal.target_trigger_rates.clone(),
4441 cal.n_lines_between_calibrations,
4442 );
4443 fresh.current_values = cal.current_values.clone();
4444 gen.velocity_calibrator = Some(fresh);
4445 }
4446
4447 gen
4448 })
4449 .collect()
4450 }
4451}
4452
4453#[cfg(test)]
4454mod tests {
4455 use super::*;
4456 use crate::ChartOfAccountsGenerator;
4457
4458 #[test]
4459 fn test_generate_balanced_entries() {
4460 let mut coa_gen =
4461 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4462 let coa = Arc::new(coa_gen.generate());
4463
4464 let mut je_gen = JournalEntryGenerator::new_with_params(
4465 TransactionConfig::default(),
4466 coa,
4467 vec!["1000".to_string()],
4468 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4469 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4470 42,
4471 );
4472
4473 let mut balanced_count = 0;
4474 for _ in 0..100 {
4475 let entry = je_gen.generate();
4476
4477 let has_human_error = entry
4479 .header
4480 .header_text
4481 .as_ref()
4482 .map(|t| t.contains("[HUMAN_ERROR:"))
4483 .unwrap_or(false);
4484
4485 if !has_human_error {
4486 assert!(
4487 entry.is_balanced(),
4488 "Entry {:?} is not balanced",
4489 entry.header.document_id
4490 );
4491 balanced_count += 1;
4492 }
4493 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
4494 }
4495
4496 assert!(
4498 balanced_count >= 80,
4499 "Expected at least 80 balanced entries, got {}",
4500 balanced_count
4501 );
4502 }
4503
4504 #[test]
4505 fn test_deterministic_generation() {
4506 let mut coa_gen =
4507 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4508 let coa = Arc::new(coa_gen.generate());
4509
4510 let mut gen1 = JournalEntryGenerator::new_with_params(
4511 TransactionConfig::default(),
4512 Arc::clone(&coa),
4513 vec!["1000".to_string()],
4514 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4515 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4516 42,
4517 );
4518
4519 let mut gen2 = JournalEntryGenerator::new_with_params(
4520 TransactionConfig::default(),
4521 coa,
4522 vec!["1000".to_string()],
4523 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4524 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4525 42,
4526 );
4527
4528 for _ in 0..50 {
4529 let e1 = gen1.generate();
4530 let e2 = gen2.generate();
4531 assert_eq!(e1.header.document_id, e2.header.document_id);
4532 assert_eq!(e1.total_debit(), e2.total_debit());
4533 }
4534 }
4535
4536 #[test]
4537 fn test_templates_generate_descriptions() {
4538 let mut coa_gen =
4539 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4540 let coa = Arc::new(coa_gen.generate());
4541
4542 let template_config = TemplateConfig {
4544 names: datasynth_config::schema::NameTemplateConfig {
4545 generate_realistic_names: true,
4546 email_domain: "test.com".to_string(),
4547 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
4548 },
4549 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
4550 generate_header_text: true,
4551 generate_line_text: true,
4552 },
4553 references: datasynth_config::schema::ReferenceTemplateConfig {
4554 generate_references: true,
4555 invoice_prefix: "TEST-INV".to_string(),
4556 po_prefix: "TEST-PO".to_string(),
4557 so_prefix: "TEST-SO".to_string(),
4558 },
4559 path: None,
4560 merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
4561 };
4562
4563 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4564 TransactionConfig::default(),
4565 coa,
4566 vec!["1000".to_string()],
4567 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4568 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4569 42,
4570 template_config,
4571 None,
4572 )
4573 .with_persona_errors(false); for _ in 0..10 {
4579 let entry = je_gen.generate();
4580
4581 assert!(
4583 entry.header.header_text.is_some(),
4584 "Header text should be populated"
4585 );
4586
4587 assert!(
4589 entry.header.reference.is_some(),
4590 "Reference should be populated"
4591 );
4592
4593 assert!(
4595 entry.header.business_process.is_some(),
4596 "Business process should be set"
4597 );
4598
4599 for line in &entry.lines {
4601 assert!(line.line_text.is_some(), "Line text should be populated");
4602 }
4603
4604 assert!(entry.is_balanced());
4606 }
4607 }
4608
4609 #[test]
4610 fn test_user_pool_integration() {
4611 let mut coa_gen =
4612 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4613 let coa = Arc::new(coa_gen.generate());
4614
4615 let companies = vec!["1000".to_string()];
4616
4617 let mut user_gen = crate::UserGenerator::new(42);
4619 let user_pool = user_gen.generate_standard(&companies);
4620
4621 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4622 TransactionConfig::default(),
4623 coa,
4624 companies,
4625 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4626 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4627 42,
4628 TemplateConfig::default(),
4629 Some(user_pool),
4630 );
4631
4632 for _ in 0..20 {
4634 let entry = je_gen.generate();
4635
4636 assert!(!entry.header.created_by.is_empty());
4639 }
4640 }
4641
4642 #[test]
4643 fn test_master_data_connection() {
4644 let mut coa_gen =
4645 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4646 let coa = Arc::new(coa_gen.generate());
4647
4648 let vendors = vec![
4650 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
4651 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
4652 ];
4653
4654 let customers = vec![
4656 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
4657 Customer::new(
4658 "C-TEST-002",
4659 "Test Customer Two",
4660 CustomerType::SmallBusiness,
4661 ),
4662 ];
4663
4664 let materials = vec![Material::new(
4666 "MAT-TEST-001",
4667 "Test Material A",
4668 MaterialType::RawMaterial,
4669 )];
4670
4671 let generator = JournalEntryGenerator::new_with_params(
4673 TransactionConfig::default(),
4674 coa,
4675 vec!["1000".to_string()],
4676 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4677 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4678 42,
4679 );
4680
4681 assert!(!generator.is_using_real_master_data());
4683
4684 let generator_with_data = generator
4686 .with_vendors(&vendors)
4687 .with_customers(&customers)
4688 .with_materials(&materials);
4689
4690 assert!(generator_with_data.is_using_real_master_data());
4692 }
4693
4694 #[test]
4695 fn test_with_master_data_convenience_method() {
4696 let mut coa_gen =
4697 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4698 let coa = Arc::new(coa_gen.generate());
4699
4700 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
4701 let customers = vec![Customer::new(
4702 "C-001",
4703 "Customer One",
4704 CustomerType::Corporate,
4705 )];
4706 let materials = vec![Material::new(
4707 "MAT-001",
4708 "Material One",
4709 MaterialType::RawMaterial,
4710 )];
4711
4712 let generator = JournalEntryGenerator::new_with_params(
4713 TransactionConfig::default(),
4714 coa,
4715 vec!["1000".to_string()],
4716 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4717 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4718 42,
4719 )
4720 .with_master_data(&vendors, &customers, &materials);
4721
4722 assert!(generator.is_using_real_master_data());
4723 }
4724
4725 #[test]
4726 fn test_stress_factors_increase_error_rate() {
4727 let mut coa_gen =
4728 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4729 let coa = Arc::new(coa_gen.generate());
4730
4731 let generator = JournalEntryGenerator::new_with_params(
4732 TransactionConfig::default(),
4733 coa,
4734 vec!["1000".to_string()],
4735 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4736 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4737 42,
4738 );
4739
4740 let base_rate = 0.1;
4741
4742 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
4745 assert!(
4746 (regular_rate - base_rate).abs() < 0.01,
4747 "Regular day should have minimal stress factor adjustment"
4748 );
4749
4750 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
4753 assert!(
4754 month_end_rate > regular_rate,
4755 "Month end should have higher error rate than regular day"
4756 );
4757
4758 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
4761 assert!(
4762 year_end_rate > month_end_rate,
4763 "Year end should have highest error rate"
4764 );
4765
4766 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
4769 assert!(
4770 friday_rate > regular_rate,
4771 "Friday should have higher error rate than mid-week"
4772 );
4773
4774 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
4777 assert!(
4778 monday_rate > regular_rate,
4779 "Monday should have higher error rate than mid-week"
4780 );
4781 }
4782
4783 #[test]
4784 fn test_batching_produces_similar_entries() {
4785 let mut coa_gen =
4786 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4787 let coa = Arc::new(coa_gen.generate());
4788
4789 let mut je_gen = JournalEntryGenerator::new_with_params(
4791 TransactionConfig::default(),
4792 coa,
4793 vec!["1000".to_string()],
4794 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4795 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4796 123,
4797 )
4798 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
4802
4803 for entry in &entries {
4805 assert!(
4806 entry.is_balanced(),
4807 "All entries including batched should be balanced"
4808 );
4809 }
4810
4811 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
4813 std::collections::HashMap::new();
4814 for entry in &entries {
4815 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
4816 }
4817
4818 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
4820 assert!(
4821 dates_with_multiple > 0,
4822 "With batching, should see some dates with multiple entries"
4823 );
4824 }
4825
4826 #[test]
4827 fn test_temporal_patterns_business_days() {
4828 use datasynth_config::schema::{
4829 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
4830 };
4831
4832 let mut coa_gen =
4833 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4834 let coa = Arc::new(coa_gen.generate());
4835
4836 let temporal_config = TemporalPatternsConfig {
4838 enabled: true,
4839 business_days: BusinessDaySchemaConfig {
4840 enabled: true,
4841 ..Default::default()
4842 },
4843 calendars: CalendarSchemaConfig {
4844 regions: vec!["US".to_string()],
4845 custom_holidays: vec![],
4846 },
4847 ..Default::default()
4848 };
4849
4850 let mut je_gen = JournalEntryGenerator::new_with_params(
4851 TransactionConfig::default(),
4852 coa,
4853 vec!["1000".to_string()],
4854 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4855 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
4857 )
4858 .with_temporal_patterns(temporal_config, 42)
4859 .with_persona_errors(false);
4860
4861 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
4863
4864 for entry in &entries {
4865 let weekday = entry.header.posting_date.weekday();
4866 assert!(
4867 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
4868 "Posting date {:?} should not be a weekend",
4869 entry.header.posting_date
4870 );
4871 }
4872 }
4873
4874 #[test]
4875 fn test_default_generation_filters_weekends() {
4876 let mut coa_gen =
4880 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4881 let coa = Arc::new(coa_gen.generate());
4882
4883 let mut je_gen = JournalEntryGenerator::new_with_params(
4884 TransactionConfig::default(),
4885 coa,
4886 vec!["1000".to_string()],
4887 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4888 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4889 42,
4890 )
4891 .with_persona_errors(false);
4892
4893 let total = 500;
4894 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4895
4896 let weekend_count = entries
4897 .iter()
4898 .filter(|e| {
4899 let wd = e.header.posting_date.weekday();
4900 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
4901 })
4902 .count();
4903
4904 let weekend_pct = weekend_count as f64 / total as f64;
4905 assert!(
4906 weekend_pct < 0.05,
4907 "Expected weekend entries <5% of total without temporal_patterns enabled, \
4908 but got {:.1}% ({}/{})",
4909 weekend_pct * 100.0,
4910 weekend_count,
4911 total
4912 );
4913 }
4914
4915 #[test]
4916 fn test_document_type_derived_from_business_process() {
4917 let mut coa_gen =
4918 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4919 let coa = Arc::new(coa_gen.generate());
4920
4921 let mut je_gen = JournalEntryGenerator::new_with_params(
4922 TransactionConfig::default(),
4923 coa,
4924 vec!["1000".to_string()],
4925 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4926 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4927 99,
4928 )
4929 .with_persona_errors(false)
4930 .with_batching(false);
4931
4932 let total = 200;
4933 let mut doc_types = std::collections::HashSet::new();
4934 let mut sa_count = 0_usize;
4935
4936 for _ in 0..total {
4937 let entry = je_gen.generate();
4938 let dt = &entry.header.document_type;
4939 doc_types.insert(dt.clone());
4940 if dt == "SA" {
4941 sa_count += 1;
4942 }
4943 }
4944
4945 assert!(
4947 doc_types.len() > 3,
4948 "Expected >3 distinct document types, got {} ({:?})",
4949 doc_types.len(),
4950 doc_types,
4951 );
4952
4953 let sa_pct = sa_count as f64 / total as f64;
4955 assert!(
4956 sa_pct < 0.50,
4957 "Expected SA <50%, got {:.1}% ({}/{})",
4958 sa_pct * 100.0,
4959 sa_count,
4960 total,
4961 );
4962 }
4963
4964 #[test]
4965 fn test_enrich_line_items_account_description() {
4966 let mut coa_gen =
4967 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4968 let coa = Arc::new(coa_gen.generate());
4969
4970 let mut je_gen = JournalEntryGenerator::new_with_params(
4971 TransactionConfig::default(),
4972 coa,
4973 vec!["1000".to_string()],
4974 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4975 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4976 42,
4977 )
4978 .with_persona_errors(false);
4979
4980 let total = 200;
4981 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4982
4983 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
4985 let lines_with_desc: usize = entries
4986 .iter()
4987 .flat_map(|e| &e.lines)
4988 .filter(|l| l.account_description.is_some())
4989 .count();
4990
4991 let desc_pct = lines_with_desc as f64 / total_lines as f64;
4992 assert!(
4993 desc_pct > 0.95,
4994 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
4995 desc_pct * 100.0,
4996 lines_with_desc,
4997 total_lines,
4998 );
4999 }
5000
5001 #[test]
5002 fn test_enrich_line_items_cost_center_for_expense_accounts() {
5003 let mut coa_gen =
5004 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5005 let coa = Arc::new(coa_gen.generate());
5006
5007 let mut je_gen = JournalEntryGenerator::new_with_params(
5008 TransactionConfig::default(),
5009 coa,
5010 vec!["1000".to_string()],
5011 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5012 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5013 42,
5014 )
5015 .with_persona_errors(false);
5016
5017 let total = 300;
5018 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
5019
5020 let expense_lines: Vec<&JournalEntryLine> = entries
5022 .iter()
5023 .flat_map(|e| &e.lines)
5024 .filter(|l| {
5025 let first = l.gl_account.chars().next().unwrap_or('0');
5026 first == '5' || first == '6'
5027 })
5028 .collect();
5029
5030 if !expense_lines.is_empty() {
5031 let with_cc = expense_lines
5032 .iter()
5033 .filter(|l| l.cost_center.is_some())
5034 .count();
5035 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
5036 assert!(
5037 cc_pct > 0.80,
5038 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
5039 cc_pct * 100.0,
5040 with_cc,
5041 expense_lines.len(),
5042 );
5043 }
5044 }
5045
5046 #[test]
5047 fn test_enrich_line_items_profit_center_and_line_text() {
5048 let mut coa_gen =
5049 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5050 let coa = Arc::new(coa_gen.generate());
5051
5052 let mut je_gen = JournalEntryGenerator::new_with_params(
5053 TransactionConfig::default(),
5054 coa,
5055 vec!["1000".to_string()],
5056 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5057 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5058 42,
5059 )
5060 .with_persona_errors(false);
5061 let total = 100;
5065 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
5066
5067 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
5068
5069 let with_pc = entries
5071 .iter()
5072 .flat_map(|e| &e.lines)
5073 .filter(|l| l.profit_center.is_some())
5074 .count();
5075 let pc_pct = with_pc as f64 / total_lines as f64;
5076 assert!(
5077 pc_pct > 0.95,
5078 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
5079 pc_pct * 100.0,
5080 with_pc,
5081 total_lines,
5082 );
5083
5084 let with_text = entries
5086 .iter()
5087 .flat_map(|e| &e.lines)
5088 .filter(|l| l.line_text.is_some())
5089 .count();
5090 let text_pct = with_text as f64 / total_lines as f64;
5091 assert!(
5092 text_pct > 0.95,
5093 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
5094 text_pct * 100.0,
5095 with_text,
5096 total_lines,
5097 );
5098 }
5099
5100 #[test]
5103 fn test_je_has_audit_flags() {
5104 let mut coa_gen =
5105 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5106 let coa = Arc::new(coa_gen.generate());
5107
5108 let mut je_gen = JournalEntryGenerator::new_with_params(
5109 TransactionConfig::default(),
5110 coa,
5111 vec!["1000".to_string()],
5112 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5113 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5114 42,
5115 )
5116 .with_persona_errors(false);
5117
5118 for _ in 0..100 {
5119 let entry = je_gen.generate();
5120
5121 assert!(
5123 !entry.header.source_system.is_empty(),
5124 "source_system should be populated, got empty string"
5125 );
5126
5127 assert!(
5129 !entry.header.created_by.is_empty(),
5130 "created_by should be populated"
5131 );
5132
5133 assert!(
5135 entry.header.created_date.is_some(),
5136 "created_date should be populated"
5137 );
5138 }
5139 }
5140
5141 #[test]
5142 fn test_manual_entry_rate() {
5143 let mut coa_gen =
5144 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5145 let coa = Arc::new(coa_gen.generate());
5146
5147 let mut je_gen = JournalEntryGenerator::new_with_params(
5148 TransactionConfig::default(),
5149 coa,
5150 vec!["1000".to_string()],
5151 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5152 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5153 42,
5154 )
5155 .with_persona_errors(false)
5156 .with_batching(false);
5157
5158 let total = 1000;
5159 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
5160
5161 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
5162 let manual_rate = manual_count as f64 / total as f64;
5163
5164 assert!(
5167 manual_rate > 0.01 && manual_rate < 0.50,
5168 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
5169 manual_rate * 100.0,
5170 manual_count,
5171 total,
5172 );
5173
5174 for entry in &entries {
5176 let source_is_manual = entry.header.source == TransactionSource::Manual;
5177 assert_eq!(
5178 entry.header.is_manual, source_is_manual,
5179 "is_manual should match source == Manual"
5180 );
5181 }
5182 }
5183
5184 #[test]
5185 fn test_manual_source_consistency() {
5186 let mut coa_gen =
5187 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5188 let coa = Arc::new(coa_gen.generate());
5189
5190 let mut je_gen = JournalEntryGenerator::new_with_params(
5191 TransactionConfig::default(),
5192 coa,
5193 vec!["1000".to_string()],
5194 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5195 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5196 42,
5197 )
5198 .with_persona_errors(false)
5199 .with_batching(false);
5200
5201 for _ in 0..500 {
5202 let entry = je_gen.generate();
5203
5204 if entry.header.is_manual {
5205 let s = entry.header.source_system.as_str();
5210 assert!(
5211 s == "manual"
5212 || s == "spreadsheet"
5213 || s.starts_with("manual/")
5214 || s.starts_with("spreadsheet/"),
5215 "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
5216 );
5217 } else {
5218 let s = entry.header.source_system.as_str();
5220 assert!(
5221 !(s == "manual"
5222 || s == "spreadsheet"
5223 || s.starts_with("manual/")
5224 || s.starts_with("spreadsheet/")),
5225 "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
5226 );
5227 }
5228 }
5229 }
5230
5231 #[test]
5232 fn test_default_source_codes_breadth() {
5233 let mut coa_gen =
5238 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 7);
5239 let coa = Arc::new(coa_gen.generate());
5240 let mut je_gen = JournalEntryGenerator::new_with_params(
5241 TransactionConfig::default(),
5242 coa,
5243 vec!["1000".to_string()],
5244 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5245 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5246 7,
5247 )
5248 .with_persona_errors(false)
5249 .with_batching(false);
5250
5251 let mut codes = std::collections::HashSet::new();
5252 for _ in 0..500 {
5253 let e = je_gen.generate();
5254 let code = e
5255 .header
5256 .sap_source_code
5257 .expect("default config should populate sap_source_code");
5258 codes.insert(code);
5259 }
5260 assert!(
5261 codes.len() >= 10,
5262 "default source-mix should be broad (>=10 distinct codes), got {}",
5263 codes.len()
5264 );
5265 }
5266
5267 #[test]
5268 fn test_source_codes_opt_out() {
5269 let mut coa_gen =
5272 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 9);
5273 let coa = Arc::new(coa_gen.generate());
5274 let cfg = TransactionConfig {
5275 synthetic_source_codes: Some(false),
5276 ..TransactionConfig::default()
5277 };
5278 let mut je_gen = JournalEntryGenerator::new_with_params(
5279 cfg,
5280 coa,
5281 vec!["1000".to_string()],
5282 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5283 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5284 9,
5285 )
5286 .with_persona_errors(false)
5287 .with_batching(false);
5288 for _ in 0..50 {
5289 let e = je_gen.generate();
5290 assert!(
5291 e.header.sap_source_code.is_none(),
5292 "opt-out should leave sap_source_code None (legacy enum source)"
5293 );
5294 }
5295 }
5296
5297 #[test]
5298 fn test_recurring_templates_reuse_archetypes() {
5299 fn run(recurring: Option<bool>) -> (usize, usize, bool) {
5303 let mut coa_gen = ChartOfAccountsGenerator::new(
5304 CoAComplexity::Medium,
5305 IndustrySector::Manufacturing,
5306 11,
5307 );
5308 let coa = Arc::new(coa_gen.generate());
5309 let cfg = TransactionConfig {
5310 recurring_templates: recurring,
5311 ..TransactionConfig::default()
5312 };
5313 let mut g = JournalEntryGenerator::new_with_params(
5314 cfg,
5315 coa,
5316 vec!["1000".to_string()],
5317 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5318 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5319 11,
5320 )
5321 .with_persona_errors(false)
5322 .with_batching(false);
5323 let n = 800;
5324 let mut arche = std::collections::HashSet::new();
5325 let mut balanced = true;
5326 for _ in 0..n {
5327 let e = g.generate();
5328 if !e.is_balanced() {
5329 balanced = false;
5330 }
5331 let mut sig: Vec<(String, bool)> = e
5332 .lines
5333 .iter()
5334 .map(|l| (l.gl_account.clone(), l.debit_amount > Decimal::ZERO))
5335 .collect();
5336 sig.sort();
5337 arche.insert(sig);
5338 }
5339 (n, arche.len(), balanced)
5340 }
5341 let (n, distinct_on, bal_on) = run(Some(true));
5342 let (_, distinct_off, bal_off) = run(Some(false));
5343 assert!(bal_on && bal_off, "balance preserved in both modes");
5344 assert!(
5345 distinct_on < distinct_off,
5346 "templating should reduce distinct archetypes ({distinct_on} on vs {distinct_off} off)"
5347 );
5348 assert!(
5349 distinct_on * 2 < n,
5350 "templating should reuse heavily: {distinct_on} distinct archetypes over {n} JEs"
5351 );
5352 }
5353
5354 #[test]
5355 fn test_reversal_process_emits_balanced_reversals() {
5356 fn run(rate: Option<f64>) -> (usize, bool) {
5359 let mut coa_gen = ChartOfAccountsGenerator::new(
5360 CoAComplexity::Small,
5361 IndustrySector::Manufacturing,
5362 13,
5363 );
5364 let coa = Arc::new(coa_gen.generate());
5365 let cfg = TransactionConfig {
5366 reversal_rate: rate,
5367 ..TransactionConfig::default()
5368 };
5369 let mut g = JournalEntryGenerator::new_with_params(
5370 cfg,
5371 coa,
5372 vec!["1000".to_string()],
5373 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5374 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5375 13,
5376 )
5377 .with_persona_errors(false)
5378 .with_batching(false);
5379 let mut reversals = 0;
5380 let mut balanced = true;
5381 for _ in 0..1000 {
5382 let e = g.generate();
5383 if !e.is_balanced() {
5384 balanced = false;
5385 }
5386 if e.header
5387 .header_text
5388 .as_deref()
5389 .is_some_and(|t| t.starts_with("Reversal of"))
5390 {
5391 reversals += 1;
5392 }
5393 }
5394 (reversals, balanced)
5395 }
5396 let (rev_on, bal_on) = run(Some(0.05));
5397 let (rev_off, bal_off) = run(Some(0.0));
5398 assert!(bal_on && bal_off, "all entries balanced incl. reversals");
5399 assert_eq!(rev_off, 0, "rate 0.0 emits no reversals, got {rev_off}");
5400 assert!(rev_on > 0, "rate 0.05 should emit reversals, got {rev_on}");
5401 }
5402
5403 #[test]
5404 fn test_account_concentration_creates_pareto() {
5405 fn run(concentration: Option<bool>) -> (f64, bool) {
5410 let mut coa_gen = ChartOfAccountsGenerator::new(
5411 CoAComplexity::Medium,
5412 IndustrySector::Manufacturing,
5413 17,
5414 );
5415 let coa = Arc::new(coa_gen.generate());
5416 let cfg = TransactionConfig {
5417 account_concentration: concentration,
5418 recurring_templates: Some(false),
5419 reversal_rate: Some(0.0),
5420 ..TransactionConfig::default()
5421 };
5422 let mut g = JournalEntryGenerator::new_with_params(
5423 cfg,
5424 coa,
5425 vec!["1000".to_string()],
5426 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5427 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5428 17,
5429 )
5430 .with_persona_errors(false)
5431 .with_batching(false);
5432 let mut counts: std::collections::HashMap<String, usize> =
5433 std::collections::HashMap::new();
5434 let mut total_lines = 0usize;
5435 let mut balanced = true;
5436 for _ in 0..1000 {
5437 let e = g.generate();
5438 if !e.is_balanced() {
5439 balanced = false;
5440 }
5441 for l in &e.lines {
5442 *counts.entry(l.gl_account.clone()).or_default() += 1;
5443 total_lines += 1;
5444 }
5445 }
5446 let mut v: Vec<usize> = counts.values().copied().collect();
5449 v.sort_unstable_by(|a, b| b.cmp(a));
5450 let top_k = ((v.len() as f64 * 0.10).ceil() as usize).max(1);
5451 let top_share = v.iter().take(top_k).sum::<usize>() as f64 / total_lines as f64;
5452 (top_share, balanced)
5453 }
5454 let (share_on, bal_on) = run(Some(true));
5455 let (share_off, bal_off) = run(Some(false));
5456 assert!(bal_on && bal_off, "balance preserved in both modes");
5457 assert!(
5458 share_on > share_off + 0.20,
5459 "concentration should raise the top-10% line share ({share_on:.3} on vs {share_off:.3} off)"
5460 );
5461 assert!(
5462 share_on > 0.50,
5463 "hot accounts should dominate: top-10% line share {share_on:.3}"
5464 );
5465 }
5466
5467 #[test]
5468 fn test_allocation_batch_emits_large_balanced_postings() {
5469 fn run(rate: Option<f64>) -> (usize, bool, usize) {
5474 let mut coa_gen = ChartOfAccountsGenerator::new(
5475 CoAComplexity::Small,
5476 IndustrySector::Manufacturing,
5477 23,
5478 );
5479 let coa = Arc::new(coa_gen.generate());
5480 let cfg = TransactionConfig {
5481 allocation_batch_rate: rate,
5482 reversal_rate: Some(0.0),
5483 ..TransactionConfig::default()
5484 };
5485 let mut g = JournalEntryGenerator::new_with_params(
5486 cfg,
5487 coa,
5488 vec!["1000".to_string()],
5489 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5490 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5491 23,
5492 )
5493 .with_persona_errors(false)
5494 .with_batching(false);
5495 let mut batches = 0usize;
5496 let mut balanced = true;
5497 let mut max_distinct_cc = 0usize;
5498 for _ in 0..2000 {
5499 let e = g.generate();
5500 if !e.is_balanced() {
5501 balanced = false;
5502 }
5503 if e.header.sap_source_code.as_deref() == Some("AB") {
5504 batches += 1;
5505 assert!(
5506 e.lines.len() >= ALLOCATION_MIN_TARGETS as usize,
5507 "allocation batch should be large, got {} lines",
5508 e.lines.len()
5509 );
5510 let ccs: std::collections::HashSet<String> = e
5511 .lines
5512 .iter()
5513 .filter_map(|l| l.cost_center.clone())
5514 .collect();
5515 max_distinct_cc = max_distinct_cc.max(ccs.len());
5516 }
5517 }
5518 (batches, balanced, max_distinct_cc)
5519 }
5520 let (on, bal_on, cc) = run(Some(0.10));
5521 let (off, bal_off, _) = run(Some(0.0));
5522 assert!(
5523 bal_on && bal_off,
5524 "all entries balanced incl. allocation batches"
5525 );
5526 assert_eq!(off, 0, "rate 0.0 emits no allocation batches, got {off}");
5527 assert!(on > 0, "rate 0.10 should emit allocation batches, got {on}");
5528 assert!(
5529 cc > 1,
5530 "allocation should spread across multiple cost centers, got {cc}"
5531 );
5532 }
5533
5534 #[test]
5535 fn test_derived_id_processes_keep_document_ids_unique() {
5536 let mut coa_gen =
5541 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 31);
5542 let coa = Arc::new(coa_gen.generate());
5543 let cfg = TransactionConfig {
5544 reversal_rate: Some(0.15),
5545 allocation_batch_rate: Some(0.10),
5546 ..TransactionConfig::default()
5547 };
5548 let mut g = JournalEntryGenerator::new_with_params(
5549 cfg,
5550 coa,
5551 vec!["1000".to_string()],
5552 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5553 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5554 31,
5555 )
5556 .with_persona_errors(false)
5557 .with_batching(false);
5558 let mut ids = std::collections::HashSet::new();
5559 let n = 3000;
5560 for _ in 0..n {
5561 let e = g.generate();
5562 assert!(
5563 ids.insert(e.header.document_id),
5564 "duplicate document id {} (derived-id collision)",
5565 e.header.document_id
5566 );
5567 }
5568 assert_eq!(ids.len(), n, "all {n} document ids unique");
5569 }
5570
5571 #[test]
5572 fn test_business_unit_rolls_up_from_cost_center() {
5573 fn run(enabled: Option<bool>) -> (usize, usize, bool, bool) {
5578 let mut coa_gen = ChartOfAccountsGenerator::new(
5579 CoAComplexity::Medium,
5580 IndustrySector::Manufacturing,
5581 19,
5582 );
5583 let coa = Arc::new(coa_gen.generate());
5584 let cfg = TransactionConfig {
5585 business_unit_dimension: enabled,
5586 ..TransactionConfig::default()
5587 };
5588 let mut g = JournalEntryGenerator::new_with_params(
5589 cfg,
5590 coa,
5591 vec!["1000".to_string()],
5592 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5593 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5594 19,
5595 )
5596 .with_persona_errors(false)
5597 .with_batching(false);
5598 let mut dim_lines = 0usize;
5599 let mut bu_lines = 0usize;
5600 let mut consistent = true; let mut well_formed = true; let mut dim_to_bu: std::collections::HashMap<String, String> =
5603 std::collections::HashMap::new();
5604 for _ in 0..600 {
5605 let e = g.generate();
5606 for l in &e.lines {
5607 let dim = l.cost_center.as_deref().or(l.profit_center.as_deref());
5609 if dim.is_some() {
5610 dim_lines += 1;
5611 }
5612 if let Some(bu) = &l.business_unit {
5613 bu_lines += 1;
5614 let d = dim.unwrap_or_default().to_string();
5615 if bu != &JournalEntryGenerator::business_unit_for_dimension(&d) {
5616 consistent = false;
5617 }
5618 if dim_to_bu
5620 .insert(d, bu.clone())
5621 .is_some_and(|prev| &prev != bu)
5622 {
5623 consistent = false;
5624 }
5625 let n_ok = bu.strip_prefix("BU").and_then(|d| d.parse::<u32>().ok());
5626 if !matches!(n_ok, Some(1..=11)) {
5627 well_formed = false;
5628 }
5629 }
5630 }
5631 }
5632 (dim_lines, bu_lines, consistent, well_formed)
5633 }
5634 let (dim_on, bu_on, consistent, well_formed) = run(Some(true));
5635 let (_, bu_off, _, _) = run(Some(false));
5636 assert!(
5637 dim_on > 0 && bu_on > 0,
5638 "BU should be populated where CC/PC is"
5639 );
5640 assert_eq!(
5641 dim_on, bu_on,
5642 "every CC/PC-bearing line gets a BU ({dim_on} dim vs {bu_on} BU)"
5643 );
5644 assert!(
5645 consistent,
5646 "BU must be the deterministic roll-up of its CC/PC"
5647 );
5648 assert!(well_formed, "BU codes must be BU01..BU11");
5649 assert_eq!(bu_off, 0, "dimension off ⇒ no business_unit, got {bu_off}");
5650 }
5651
5652 #[test]
5653 fn test_foreign_currency_sap_style() {
5654 fn run(rate: Option<f64>) -> (usize, bool, bool) {
5660 let mut coa_gen = ChartOfAccountsGenerator::new(
5661 CoAComplexity::Small,
5662 IndustrySector::Manufacturing,
5663 29,
5664 );
5665 let coa = Arc::new(coa_gen.generate());
5666 let cfg = TransactionConfig {
5667 foreign_currency_rate: rate,
5668 reversal_rate: Some(0.0),
5669 allocation_batch_rate: Some(0.0),
5670 ..TransactionConfig::default()
5671 };
5672 let mut g = JournalEntryGenerator::new_with_params(
5673 cfg,
5674 coa,
5675 vec!["1000".to_string()],
5676 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5677 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5678 29,
5679 )
5680 .with_persona_errors(false)
5681 .with_batching(false);
5682 let mut foreign = 0usize;
5683 let mut ledger_ok = true; let mut txn_ok = true; for _ in 0..1500 {
5686 let e = g.generate();
5687 if !e.is_balanced() {
5688 ledger_ok = false;
5689 }
5690 if e.header.currency != "USD" {
5691 foreign += 1;
5692 if !e.lines.iter().all(|l| l.transaction_amount.is_some()) {
5693 txn_ok = false;
5694 }
5695 let td: Decimal = e
5696 .lines
5697 .iter()
5698 .filter(|l| l.debit_amount > Decimal::ZERO)
5699 .filter_map(|l| l.transaction_amount)
5700 .sum();
5701 let tc: Decimal = e
5702 .lines
5703 .iter()
5704 .filter(|l| l.credit_amount > Decimal::ZERO)
5705 .filter_map(|l| l.transaction_amount)
5706 .sum();
5707 let tol = Decimal::new(e.lines.len() as i64, 2);
5709 if (td - tc).abs() > tol {
5710 txn_ok = false;
5711 }
5712 }
5713 }
5714 (foreign, ledger_ok, txn_ok)
5715 }
5716 let (fon, lbal_on, tbal_on) = run(Some(0.20));
5717 let (foff, lbal_off, _) = run(Some(0.0));
5718 assert!(
5719 lbal_on && lbal_off,
5720 "ledger balance (debit==credit) preserved in both modes"
5721 );
5722 assert!(
5723 fon > 0,
5724 "rate 0.20 should produce foreign-currency JEs, got {fon}"
5725 );
5726 assert_eq!(foff, 0, "rate 0.0 ⇒ no foreign JEs, got {foff}");
5727 assert!(
5728 tbal_on,
5729 "foreign JEs carry transaction_amount + balance in the transaction currency"
5730 );
5731 }
5732
5733 #[test]
5734 fn test_created_date_before_posting() {
5735 let mut coa_gen =
5736 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5737 let coa = Arc::new(coa_gen.generate());
5738
5739 let mut je_gen = JournalEntryGenerator::new_with_params(
5740 TransactionConfig::default(),
5741 coa,
5742 vec!["1000".to_string()],
5743 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5744 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5745 42,
5746 )
5747 .with_persona_errors(false);
5748
5749 for _ in 0..500 {
5750 let entry = je_gen.generate();
5751
5752 if let Some(created_date) = entry.header.created_date {
5753 let created_naive_date = created_date.date();
5754 assert!(
5755 created_naive_date <= entry.header.posting_date,
5756 "created_date ({}) should be <= posting_date ({})",
5757 created_naive_date,
5758 entry.header.posting_date,
5759 );
5760 }
5761 }
5762 }
5763
5764 #[test]
5768 fn apply_calibration_step_updates_lognormal_sigma() {
5769 let mut coa_gen =
5770 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5771 let coa = Arc::new(coa_gen.generate());
5772
5773 let mut gen = JournalEntryGenerator::new_with_params(
5774 TransactionConfig::default(),
5775 coa,
5776 vec!["1000".to_string()],
5777 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5778 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5779 42,
5780 );
5781
5782 let baseline_sigma = gen.amount_sampler.lognormal_sigma();
5783
5784 let step_sigma = crate::velocity_calibrator::CalibrationStep {
5785 rule_id: "R6".to_string(),
5786 parameter: "amounts.lognormal_sigma".to_string(),
5787 delta: 0.01,
5788 new_value: baseline_sigma + 0.01,
5789 };
5790 gen.apply_calibration_step(&step_sigma);
5791 assert!(
5792 (gen.amount_sampler.lognormal_sigma() - (baseline_sigma + 0.01)).abs() < 1e-9,
5793 "lognormal_sigma should be updated to {}",
5794 baseline_sigma + 0.01
5795 );
5796
5797 let baseline_round = gen.amount_sampler.round_number_probability();
5798 let step_round = crate::velocity_calibrator::CalibrationStep {
5799 rule_id: "R9".to_string(),
5800 parameter: "amounts.round_dollar_share".to_string(),
5801 delta: -0.005,
5802 new_value: (baseline_round - 0.005).max(0.0),
5803 };
5804 gen.apply_calibration_step(&step_round);
5805 let expected = (baseline_round - 0.005).max(0.0).clamp(0.0, 1.0);
5806 assert!(
5807 (gen.amount_sampler.round_number_probability() - expected).abs() < 1e-9,
5808 "round_number_probability should be updated to {}",
5809 expected
5810 );
5811 }
5812
5813 #[test]
5814 fn master_data_resolver_fills_every_pii_kind() {
5815 use datasynth_core::distributions::text_taxonomy::{
5816 PiiPlaceholderKind, PlaceholderResolver,
5817 };
5818 let mut r = MasterDataResolver {
5819 companies: vec!["Acme AG".to_string()],
5820 persons: vec!["Hans Muster".to_string()],
5821 streets: vec!["Hauptstrasse 1".to_string()],
5822 patients: vec!["Patient X".to_string()],
5823 };
5824 let mut rng = rand::rng();
5825 assert_eq!(r.resolve(PiiPlaceholderKind::Company, &mut rng), "Acme AG");
5826 assert_eq!(
5827 r.resolve(PiiPlaceholderKind::Person, &mut rng),
5828 "Hans Muster"
5829 );
5830 assert_eq!(
5831 r.resolve(PiiPlaceholderKind::Street, &mut rng),
5832 "Hauptstrasse 1"
5833 );
5834 assert_eq!(
5835 r.resolve(PiiPlaceholderKind::Patient, &mut rng),
5836 "Patient X"
5837 );
5838 }
5839
5840 #[test]
5841 fn master_data_resolver_empty_pool_falls_back() {
5842 use datasynth_core::distributions::text_taxonomy::{
5843 PiiPlaceholderKind, PlaceholderResolver,
5844 };
5845 let mut r = MasterDataResolver::default();
5846 let mut rng = rand::rng();
5847 let v = r.resolve(PiiPlaceholderKind::Company, &mut rng);
5848 assert!(!v.is_empty());
5849 }
5850
5851 #[test]
5863 fn synthetic_patient_pool_entries_pass_residual_scan() {
5864 use datasynth_core::distributions::text_taxonomy::PlaceholderGrammar;
5865 for name in synthetic_patient_pool("de_CH") {
5866 let filled = format!("*{name} G:2024-01-15 E:2024-01-20 A:2024-02-01");
5867 let structural: Vec<_> = PlaceholderGrammar::residual_pii_scan(&filled)
5868 .into_iter()
5869 .filter(|h| h.pattern != "given_name")
5870 .collect();
5871 assert!(
5872 structural.is_empty(),
5873 "synthetic patient name {name:?} fills to PII-shaped {filled:?}: {structural:?}"
5874 );
5875 }
5876 }
5877
5878 #[test]
5879 fn master_data_resolver_fallbacks_are_non_empty_and_placeholder_free() {
5880 use datasynth_core::distributions::text_taxonomy::{
5881 PiiPlaceholderKind, PlaceholderResolver,
5882 };
5883 let mut r = MasterDataResolver::default();
5887 let mut rng = rand::rng();
5888 for kind in [
5889 PiiPlaceholderKind::Company,
5890 PiiPlaceholderKind::Person,
5891 PiiPlaceholderKind::Street,
5892 PiiPlaceholderKind::Patient,
5893 ] {
5894 let v = r.resolve(kind, &mut rng);
5895 assert!(!v.is_empty(), "fallback for {kind:?} must be non-empty");
5896 assert!(
5897 !v.contains('{'),
5898 "fallback for {kind:?} must not contain a placeholder token"
5899 );
5900 }
5901 }
5902}