1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::{Arc, LazyLock};
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15 TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18 AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19 DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20 PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21 ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34use datasynth_core::distributions::text_taxonomy::{PiiPlaceholderKind, PlaceholderResolver};
35
36static DEFAULT_SOURCE_MIX: LazyLock<
40 datasynth_core::distributions::behavioral_priors::SourceMixPrior,
41> = LazyLock::new(datasynth_core::distributions::behavioral_priors::SourceMixPrior::sap_default);
42
43const DEFAULT_REVERSAL_RATE: f64 = 0.10;
48
49const DEFAULT_ALLOCATION_RATE: f64 = 0.008;
55const FOREIGN_CCYS: &[(&str, f64)] = &[
58 ("EUR", 1.09),
59 ("GBP", 1.27),
60 ("CHF", 1.12),
61 ("CAD", 0.74),
62 ("JPY", 0.0068),
63 ("AUD", 0.66),
64 ("CNY", 0.14),
65];
66const ALLOCATION_MIN_TARGETS: u32 = 30;
69const ALLOCATION_MAX_TARGETS: u32 = 80;
70
71const ZIPF_ALPHA: f64 = 2.0;
75const ZIPF_CAP: usize = 16_384;
78static ZIPF_CUM: LazyLock<Vec<f64>> = LazyLock::new(|| {
82 let mut cum = Vec::with_capacity(ZIPF_CAP + 1);
83 cum.push(0.0);
84 let mut acc = 0.0_f64;
85 for i in 1..=ZIPF_CAP {
86 acc += 1.0 / (i as f64).powf(ZIPF_ALPHA);
87 cum.push(acc);
88 }
89 cum
90});
91
92#[derive(Debug, Default)]
99pub struct MasterDataResolver {
100 pub companies: Vec<String>,
101 pub persons: Vec<String>,
102 pub streets: Vec<String>,
103 pub patients: Vec<String>,
104}
105
106impl PlaceholderResolver for MasterDataResolver {
107 fn resolve(&mut self, kind: PiiPlaceholderKind, rng: &mut dyn rand::Rng) -> String {
108 use rand::RngExt;
109 let (pool, fallback): (&Vec<String>, &str) = match kind {
110 PiiPlaceholderKind::Company => (&self.companies, "Synthetic Company AG"),
111 PiiPlaceholderKind::Person => (&self.persons, "Synthetic Person"),
112 PiiPlaceholderKind::Street => (&self.streets, "Synthetic Street 1"),
113 PiiPlaceholderKind::Patient => (&self.patients, "Synthetic Patient"),
114 };
115 if pool.is_empty() {
116 return fallback.to_string();
117 }
118 let idx = rng.random_range(0..pool.len());
119 pool[idx].clone()
120 }
121}
122
123fn synthetic_patient_pool(_locale: &str) -> Vec<String> {
135 [
136 "Alex Beispiel",
137 "Bea Muster",
138 "Cleo Synthetic",
139 "Demo Example",
140 "Erik Probe",
141 "Fred Testperson",
142 "Gerda Platzhalter",
143 "Hans Demo",
144 ]
145 .iter()
146 .map(|s| s.to_string())
147 .collect()
148}
149
150pub struct JournalEntryGenerator {
152 rng: ChaCha8Rng,
153 source_mix_rng: ChaCha8Rng,
157 recurring_archetypes:
161 std::collections::HashMap<(String, String), Vec<(Vec<String>, Vec<String>)>>,
162 template_rng: ChaCha8Rng,
165 reversal_buffer: Vec<JournalEntry>,
169 reversal_rng: ChaCha8Rng,
172 account_rng: ChaCha8Rng,
177 allocation_rng: ChaCha8Rng,
181 fx_rng: ChaCha8Rng,
185 cond_pair_rng: ChaCha8Rng,
190 cond_pair_sampler: Option<
193 datasynth_core::distributions::source_conditional_pair::SourceConditionalPairSampler,
194 >,
195 current_je_source: Option<String>,
198 seed: u64,
199 config: TransactionConfig,
200 coa: Arc<ChartOfAccounts>,
201 companies: Vec<String>,
202 company_selector: WeightedCompanySelector,
203 line_sampler: LineItemSampler,
204 amount_sampler: AmountSampler,
205 temporal_sampler: TemporalSampler,
206 start_date: NaiveDate,
207 end_date: NaiveDate,
208 count: u64,
209 uuid_factory: DeterministicUuidFactory,
210 user_pool: Option<UserPool>,
212 description_generator: DescriptionGenerator,
213 reference_generator: ReferenceGenerator,
214 template_config: TemplateConfig,
215 vendor_pool: VendorPool,
216 customer_pool: CustomerPool,
217 material_pool: Option<MaterialPool>,
219 cost_center_pool: Vec<String>,
225 profit_center_pool: Vec<String>,
229 using_real_master_data: bool,
231 fraud_config: FraudConfig,
233 persona_errors_enabled: bool,
235 approval_enabled: bool,
237 approval_threshold: rust_decimal::Decimal,
238 sod_violation_rate: f64,
240 batch_state: Option<BatchState>,
242 drift_controller: Option<DriftController>,
244 business_day_calculator: Option<BusinessDayCalculator>,
246 processing_lag_calculator: Option<ProcessingLagCalculator>,
247 temporal_patterns_config: Option<TemporalPatternsConfig>,
248 business_process_weights: [(BusinessProcess, f64); 5],
252 advanced_amount_sampler: Option<AdvancedAmountSampler>,
256 conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
264 correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
270 pub loaded_priors: Option<crate::priors_loader::LoadedPriors>,
274 iet_day_accum: std::collections::HashMap<String, f64>,
278 last_tp_by_source: std::collections::HashMap<String, String>,
282 pub velocity_calibrator: Option<crate::velocity_calibrator::VelocityCalibrator>,
285 md_resolver: MasterDataResolver,
289}
290
291const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
292 (BusinessProcess::O2C, 0.35),
293 (BusinessProcess::P2P, 0.30),
294 (BusinessProcess::R2R, 0.20),
295 (BusinessProcess::H2R, 0.10),
296 (BusinessProcess::A2R, 0.05),
297];
298
299impl JournalEntryGenerator {
325 fn supported_conditional_input(field: &str) -> bool {
326 matches!(
327 field,
328 "month"
329 | "quarter"
330 | "year"
331 | "day_of_week"
332 | "day_of_month"
333 | "day_of_year"
334 | "week_of_year"
335 | "is_period_end"
336 | "is_quarter_end"
337 | "is_year_end"
338 | "constant"
339 | ""
340 )
341 }
342
343 fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
344 let input_field = match self
345 .conditional_amount_override
346 .as_ref()
347 .map(|s| s.config().input_field.as_str())
348 {
349 Some(f) => f,
350 None => return 0.0,
351 };
352
353 let is_last_business_day = |d: chrono::NaiveDate| -> bool {
354 let next = d.succ_opt();
358 match next {
359 Some(n) => n.month() != d.month(),
360 None => true,
361 }
362 };
363
364 match input_field {
365 "month" => posting_date.month() as f64,
366 "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
367 "year" => posting_date.year() as f64,
368 "day_of_week" => posting_date.weekday().number_from_monday() as f64,
369 "day_of_month" => posting_date.day() as f64,
370 "day_of_year" => posting_date.ordinal() as f64,
371 "week_of_year" => posting_date.iso_week().week() as f64,
372 "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
373 "is_quarter_end" => {
374 let m = posting_date.month();
375 let is_q_month = matches!(m, 3 | 6 | 9 | 12);
376 f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
377 }
378 "is_year_end" => f64::from(u8::from(
379 posting_date.month() == 12 && is_last_business_day(posting_date),
380 )),
381 _ => 0.0,
382 }
383 }
384}
385
386fn industry_profile_to_log_normal(
387 p: datasynth_config::schema::IndustryProfileType,
388) -> datasynth_core::distributions::LogNormalMixtureConfig {
389 use datasynth_config::schema::IndustryProfileType as P;
390 let industry = match p {
391 P::Retail => IndustryType::Retail,
392 P::Manufacturing => IndustryType::Manufacturing,
393 P::FinancialServices => IndustryType::FinancialServices,
394 P::Healthcare => IndustryType::Healthcare,
395 P::Technology => IndustryType::Technology,
396 };
397 IndustryAmountProfile::for_industry(industry).sales_amounts
398}
399
400#[derive(Clone)]
405struct BatchState {
406 base_account_number: String,
408 base_amount: rust_decimal::Decimal,
409 base_business_process: Option<BusinessProcess>,
410 base_posting_date: NaiveDate,
411 remaining: u8,
413}
414
415impl JournalEntryGenerator {
416 pub fn new_with_params(
418 config: TransactionConfig,
419 coa: Arc<ChartOfAccounts>,
420 companies: Vec<String>,
421 start_date: NaiveDate,
422 end_date: NaiveDate,
423 seed: u64,
424 ) -> Self {
425 Self::new_with_full_config(
426 config,
427 coa,
428 companies,
429 start_date,
430 end_date,
431 seed,
432 TemplateConfig::default(),
433 None,
434 )
435 }
436
437 #[allow(clippy::too_many_arguments)]
439 pub fn new_with_full_config(
440 config: TransactionConfig,
441 coa: Arc<ChartOfAccounts>,
442 companies: Vec<String>,
443 start_date: NaiveDate,
444 end_date: NaiveDate,
445 seed: u64,
446 template_config: TemplateConfig,
447 user_pool: Option<UserPool>,
448 ) -> Self {
449 let user_pool = user_pool.or_else(|| {
451 if template_config.names.generate_realistic_names {
452 let user_gen_config = UserGeneratorConfig {
453 culture_distribution: vec![
454 (
455 datasynth_core::templates::NameCulture::WesternUs,
456 template_config.names.culture_distribution.western_us,
457 ),
458 (
459 datasynth_core::templates::NameCulture::Hispanic,
460 template_config.names.culture_distribution.hispanic,
461 ),
462 (
463 datasynth_core::templates::NameCulture::German,
464 template_config.names.culture_distribution.german,
465 ),
466 (
467 datasynth_core::templates::NameCulture::French,
468 template_config.names.culture_distribution.french,
469 ),
470 (
471 datasynth_core::templates::NameCulture::Chinese,
472 template_config.names.culture_distribution.chinese,
473 ),
474 (
475 datasynth_core::templates::NameCulture::Japanese,
476 template_config.names.culture_distribution.japanese,
477 ),
478 (
479 datasynth_core::templates::NameCulture::Indian,
480 template_config.names.culture_distribution.indian,
481 ),
482 ],
483 email_domain: template_config.names.email_domain.clone(),
484 generate_realistic_names: true,
485 };
486 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
487 Some(user_gen.generate_standard(&companies))
488 } else {
489 None
490 }
491 });
492
493 let mut ref_gen = ReferenceGenerator::new(
495 start_date.year(),
496 companies
497 .first()
498 .map(std::string::String::as_str)
499 .unwrap_or("1000"),
500 );
501 ref_gen.set_prefix(
502 ReferenceType::Invoice,
503 &template_config.references.invoice_prefix,
504 );
505 ref_gen.set_prefix(
506 ReferenceType::PurchaseOrder,
507 &template_config.references.po_prefix,
508 );
509 ref_gen.set_prefix(
510 ReferenceType::SalesOrder,
511 &template_config.references.so_prefix,
512 );
513
514 let company_selector = WeightedCompanySelector::uniform(companies.clone());
516
517 Self {
518 rng: seeded_rng(seed, 0),
519 source_mix_rng: seeded_rng(seed, 50_063),
520 recurring_archetypes: std::collections::HashMap::new(),
521 template_rng: seeded_rng(seed, 70_081),
522 reversal_buffer: Vec::new(),
523 reversal_rng: seeded_rng(seed, 90_017),
524 account_rng: seeded_rng(seed, 60_071),
525 allocation_rng: seeded_rng(seed, 80_023),
526 fx_rng: seeded_rng(seed, 70_093),
527 cond_pair_rng: seeded_rng(seed, 110_071),
528 cond_pair_sampler: None,
529 current_je_source: None,
530 seed,
531 config: config.clone(),
532 coa,
533 companies,
534 company_selector,
535 line_sampler: LineItemSampler::with_config(
536 seed + 1,
537 config.line_item_distribution.clone(),
538 config.even_odd_distribution.clone(),
539 config.debit_credit_distribution.clone(),
540 ),
541 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
542 temporal_sampler: TemporalSampler::with_config(
543 seed + 3,
544 config.seasonality.clone(),
545 WorkingHoursConfig::default(),
546 Vec::new(),
547 ),
548 start_date,
549 end_date,
550 count: 0,
551 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
552 user_pool,
553 description_generator: DescriptionGenerator::new(),
554 reference_generator: ref_gen,
555 template_config,
556 vendor_pool: VendorPool::standard(),
557 customer_pool: CustomerPool::standard(),
558 material_pool: None,
559 cost_center_pool: Vec::new(),
560 profit_center_pool: Vec::new(),
561 using_real_master_data: false,
562 fraud_config: FraudConfig::default(),
563 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
568 drift_controller: None,
569 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
572 Region::US,
573 start_date.year(),
574 ))),
575 processing_lag_calculator: None,
576 temporal_patterns_config: None,
577 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
578 advanced_amount_sampler: None,
579 conditional_amount_override: None,
580 correlation_copula: None,
581 loaded_priors: None,
582 iet_day_accum: std::collections::HashMap::new(),
583 last_tp_by_source: std::collections::HashMap::new(),
584 velocity_calibrator: None,
585 md_resolver: MasterDataResolver::default(),
586 }
587 }
588
589 pub fn set_advanced_distributions(
608 &mut self,
609 config: &AdvancedDistributionConfig,
610 seed: u64,
611 ) -> Result<(), String> {
612 if !config.enabled {
613 return Ok(());
614 }
615
616 self.conditional_amount_override = config
622 .conditional
623 .iter()
624 .find(|c| {
625 c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
626 })
627 .and_then(|c| {
628 datasynth_core::distributions::ConditionalSampler::new(
629 seed.wrapping_add(17),
630 c.to_core_config(),
631 )
632 .ok()
633 });
634
635 self.correlation_copula = config
641 .correlations
642 .to_core_config_for_pair("amount", "line_count")
643 .and_then(|copula_cfg| {
644 datasynth_core::distributions::BivariateCopulaSampler::new(
645 seed.wrapping_add(31),
646 copula_cfg,
647 )
648 .ok()
649 });
650
651 if let Some(pareto) = &config.pareto {
656 if pareto.enabled {
657 let core_cfg = pareto.to_core_config();
658 self.advanced_amount_sampler =
659 Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
660 return Ok(());
661 }
662 }
663
664 if !config.amounts.enabled {
665 return Ok(());
666 }
667
668 match config.amounts.distribution_type {
669 MixtureDistributionType::LogNormal => {
670 let lognormal_cfg = config.amounts.to_log_normal_config().or_else(|| {
671 config
672 .industry_profile
673 .as_ref()
674 .map(|p| industry_profile_to_log_normal(p.profile_type()))
675 });
676 if let Some(cfg) = lognormal_cfg {
677 self.advanced_amount_sampler =
678 Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
679 }
680 }
681 MixtureDistributionType::Gaussian => {
682 if let Some(cfg) = config.amounts.to_gaussian_config() {
683 self.advanced_amount_sampler =
684 Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
685 }
686 }
687 }
688
689 Ok(())
690 }
691
692 pub fn set_business_process_weights(
696 &mut self,
697 o2c: f64,
698 p2p: f64,
699 r2r: f64,
700 h2r: f64,
701 a2r: f64,
702 ) {
703 self.business_process_weights = [
704 (BusinessProcess::O2C, o2c),
705 (BusinessProcess::P2P, p2p),
706 (BusinessProcess::R2R, r2r),
707 (BusinessProcess::H2R, h2r),
708 (BusinessProcess::A2R, a2r),
709 ];
710 }
711
712 pub fn from_generator_config(
717 full_config: &GeneratorConfig,
718 coa: Arc<ChartOfAccounts>,
719 start_date: NaiveDate,
720 end_date: NaiveDate,
721 seed: u64,
722 ) -> Self {
723 let companies: Vec<String> = full_config
724 .companies
725 .iter()
726 .map(|c| c.code.clone())
727 .collect();
728
729 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
731
732 let mut generator = Self::new_with_full_config(
733 full_config.transactions.clone(),
734 coa,
735 companies,
736 start_date,
737 end_date,
738 seed,
739 full_config.templates.clone(),
740 None,
741 );
742
743 generator.company_selector = company_selector;
745
746 generator.fraud_config = full_config.fraud.clone();
748
749 let temporal_config = &full_config.temporal_patterns;
751 if temporal_config.enabled {
752 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
753 }
754
755 generator
756 }
757
758 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
765 if config.business_days.enabled {
767 let region = config
768 .calendars
769 .regions
770 .first()
771 .map(|r| Self::parse_region(r))
772 .unwrap_or(Region::US);
773
774 let calendar = HolidayCalendar::new(region, self.start_date.year());
775 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
776 }
777
778 if config.processing_lags.enabled {
780 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
781 self.processing_lag_calculator =
782 Some(ProcessingLagCalculator::with_config(seed, lag_config));
783 }
784
785 let model = config.period_end.model.as_deref().unwrap_or("flat");
787 if model != "flat"
788 || config
789 .period_end
790 .month_end
791 .as_ref()
792 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
793 {
794 let dynamics = Self::convert_period_end_config(&config.period_end);
795 self.temporal_sampler.set_period_end_dynamics(dynamics);
796 }
797
798 self.temporal_patterns_config = Some(config);
799 self
800 }
801
802 pub fn with_country_pack_temporal(
810 mut self,
811 config: TemporalPatternsConfig,
812 seed: u64,
813 pack: &CountryPack,
814 ) -> Self {
815 if config.business_days.enabled {
817 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
818 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
819 }
820
821 if config.processing_lags.enabled {
823 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
824 self.processing_lag_calculator =
825 Some(ProcessingLagCalculator::with_config(seed, lag_config));
826 }
827
828 let model = config.period_end.model.as_deref().unwrap_or("flat");
830 if model != "flat"
831 || config
832 .period_end
833 .month_end
834 .as_ref()
835 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
836 {
837 let dynamics = Self::convert_period_end_config(&config.period_end);
838 self.temporal_sampler.set_period_end_dynamics(dynamics);
839 }
840
841 self.temporal_patterns_config = Some(config);
842 self
843 }
844
845 fn convert_processing_lag_config(
847 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
848 ) -> ProcessingLagConfig {
849 let mut config = ProcessingLagConfig {
850 enabled: schema.enabled,
851 ..Default::default()
852 };
853
854 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
856 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
857 if let Some(min) = lag.min_hours {
858 dist.min_lag_hours = min;
859 }
860 if let Some(max) = lag.max_hours {
861 dist.max_lag_hours = max;
862 }
863 dist
864 };
865
866 if let Some(ref lag) = schema.sales_order_lag {
868 config
869 .event_lags
870 .insert(EventType::SalesOrder, convert_lag(lag));
871 }
872 if let Some(ref lag) = schema.purchase_order_lag {
873 config
874 .event_lags
875 .insert(EventType::PurchaseOrder, convert_lag(lag));
876 }
877 if let Some(ref lag) = schema.goods_receipt_lag {
878 config
879 .event_lags
880 .insert(EventType::GoodsReceipt, convert_lag(lag));
881 }
882 if let Some(ref lag) = schema.invoice_receipt_lag {
883 config
884 .event_lags
885 .insert(EventType::InvoiceReceipt, convert_lag(lag));
886 }
887 if let Some(ref lag) = schema.invoice_issue_lag {
888 config
889 .event_lags
890 .insert(EventType::InvoiceIssue, convert_lag(lag));
891 }
892 if let Some(ref lag) = schema.payment_lag {
893 config
894 .event_lags
895 .insert(EventType::Payment, convert_lag(lag));
896 }
897 if let Some(ref lag) = schema.journal_entry_lag {
898 config
899 .event_lags
900 .insert(EventType::JournalEntry, convert_lag(lag));
901 }
902
903 if let Some(ref cross_day) = schema.cross_day_posting {
905 config.cross_day = CrossDayConfig {
906 enabled: cross_day.enabled,
907 probability_by_hour: cross_day.probability_by_hour.clone(),
908 ..Default::default()
909 };
910 }
911
912 config
913 }
914
915 fn convert_period_end_config(
917 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
918 ) -> PeriodEndDynamics {
919 let model_type = schema.model.as_deref().unwrap_or("exponential");
920
921 let convert_period =
923 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
924 default_peak: f64|
925 -> PeriodEndConfig {
926 if let Some(p) = period {
927 let model = match model_type {
928 "flat" => PeriodEndModel::FlatMultiplier {
929 multiplier: p.peak_multiplier.unwrap_or(default_peak),
930 },
931 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
932 start_day: p.start_day.unwrap_or(-10),
933 sustained_high_days: p.sustained_high_days.unwrap_or(3),
934 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
935 ramp_up_days: 3, },
937 _ => PeriodEndModel::ExponentialAcceleration {
938 start_day: p.start_day.unwrap_or(-10),
939 base_multiplier: p.base_multiplier.unwrap_or(1.0),
940 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
941 decay_rate: p.decay_rate.unwrap_or(0.3),
942 },
943 };
944 PeriodEndConfig {
945 enabled: true,
946 model,
947 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
948 }
949 } else {
950 PeriodEndConfig {
951 enabled: true,
952 model: PeriodEndModel::ExponentialAcceleration {
953 start_day: -10,
954 base_multiplier: 1.0,
955 peak_multiplier: default_peak,
956 decay_rate: 0.3,
957 },
958 additional_multiplier: 1.0,
959 }
960 }
961 };
962
963 PeriodEndDynamics::new(
964 convert_period(schema.month_end.as_ref(), 2.0),
965 convert_period(schema.quarter_end.as_ref(), 3.5),
966 convert_period(schema.year_end.as_ref(), 5.0),
967 )
968 }
969
970 fn parse_region(region_str: &str) -> Region {
972 match region_str.to_uppercase().as_str() {
973 "US" => Region::US,
974 "DE" => Region::DE,
975 "GB" => Region::GB,
976 "CN" => Region::CN,
977 "JP" => Region::JP,
978 "IN" => Region::IN,
979 "BR" => Region::BR,
980 "MX" => Region::MX,
981 "AU" => Region::AU,
982 "SG" => Region::SG,
983 "KR" => Region::KR,
984 "FR" => Region::FR,
985 "IT" => Region::IT,
986 "ES" => Region::ES,
987 "CA" => Region::CA,
988 _ => Region::US,
989 }
990 }
991
992 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
994 self.company_selector = selector;
995 }
996
997 pub fn company_selector(&self) -> &WeightedCompanySelector {
999 &self.company_selector
1000 }
1001
1002 pub fn set_fraud_config(&mut self, config: FraudConfig) {
1004 self.fraud_config = config;
1005 }
1006
1007 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
1012 if !vendors.is_empty() {
1013 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
1014 self.using_real_master_data = true;
1015 }
1016 self
1017 }
1018
1019 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
1024 if !customers.is_empty() {
1025 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
1026 self.using_real_master_data = true;
1027 }
1028 self
1029 }
1030
1031 pub fn with_materials(mut self, materials: &[Material]) -> Self {
1035 if !materials.is_empty() {
1036 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
1037 self.using_real_master_data = true;
1038 }
1039 self
1040 }
1041
1042 pub fn with_master_data(
1047 self,
1048 vendors: &[Vendor],
1049 customers: &[Customer],
1050 materials: &[Material],
1051 ) -> Self {
1052 self.with_vendors(vendors)
1053 .with_customers(customers)
1054 .with_materials(materials)
1055 }
1056
1057 fn refresh_md_resolver(&mut self) {
1063 let companies: Vec<String> = self
1064 .vendor_pool
1065 .vendors
1066 .iter()
1067 .map(|v| v.name.clone())
1068 .chain(self.customer_pool.customers.iter().map(|c| c.name.clone()))
1069 .collect();
1070
1071 let persons: Vec<String> = self
1072 .user_pool
1073 .as_ref()
1074 .map(|p| p.users.iter().map(|u| u.display_name.clone()).collect())
1075 .unwrap_or_default();
1076
1077 let streets: Vec<String> = Vec::new(); let patients = synthetic_patient_pool("de_CH");
1079
1080 self.md_resolver = MasterDataResolver {
1081 companies,
1082 persons,
1083 streets,
1084 patients,
1085 };
1086 }
1087
1088 pub fn with_cost_center_pool(mut self, ids: Vec<String>) -> Self {
1097 self.cost_center_pool = ids;
1098 self
1099 }
1100
1101 pub fn with_profit_center_pool(mut self, ids: Vec<String>) -> Self {
1109 self.profit_center_pool = ids;
1110 self
1111 }
1112
1113 pub fn with_user_pool(mut self, pool: UserPool) -> Self {
1122 self.user_pool = Some(pool);
1123 self
1124 }
1125
1126 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
1133 let name_gen =
1134 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
1135 let config = UserGeneratorConfig {
1136 culture_distribution: Vec::new(),
1139 email_domain: name_gen.email_domain().to_string(),
1140 generate_realistic_names: true,
1141 };
1142 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
1143 self.user_pool = Some(user_gen.generate_standard(&self.companies));
1144 self
1145 }
1146
1147 pub fn is_using_real_master_data(&self) -> bool {
1149 self.using_real_master_data
1150 }
1151
1152 fn pick_source_system(rng: &mut ChaCha8Rng, is_manual: bool, bp: BusinessProcess) -> String {
1166 if is_manual {
1167 const MANUAL: &[&str] = &[
1170 "manual/standard",
1171 "manual/adjustment",
1172 "manual/reclassification",
1173 "manual/accrual",
1174 "manual/reversal",
1175 "manual/correction",
1176 "spreadsheet/upload",
1177 "spreadsheet/journal",
1178 ];
1179 let idx = (rng.random::<u32>() as usize) % MANUAL.len();
1180 return MANUAL[idx].to_string();
1181 }
1182
1183 let primary: &[&str] = match bp {
1187 BusinessProcess::P2P => &[
1188 "SAP-MM/PO",
1189 "SAP-MM/IV",
1190 "SAP-MM/IM",
1191 "SAP-FI/AP",
1192 "Interface/EDI",
1193 ],
1194 BusinessProcess::O2C => &[
1195 "SAP-SD/ORD",
1196 "SAP-SD/DEL",
1197 "SAP-SD/IV",
1198 "SAP-FI/AR",
1199 "Interface/Lockbox",
1200 ],
1201 BusinessProcess::H2R => &["SAP-HR/PR", "SAP-HR/TIME", "Interface/PayRun"],
1202 BusinessProcess::A2R => &["SAP-FI/AA", "SAP-FI/GL"],
1203 BusinessProcess::Treasury => &["Treasury/CM", "Treasury/HD", "Interface/Bank"],
1204 BusinessProcess::Tax => &["Tax/RPT", "SAP-FI/GL"],
1205 BusinessProcess::Mfg => &["SAP-MM/IM", "SAP-FI/GL"],
1206 _ => &[
1209 "SAP-FI/GL",
1210 "SAP-FI/AP",
1211 "SAP-FI/AR",
1212 "SAP-FI/AA",
1213 "External/SubL",
1214 ],
1215 };
1216
1217 const CROSS: &[&str] = &[
1220 "SAP-FI/GL",
1221 "SAP-FI/AP",
1222 "SAP-FI/AR",
1223 "Interface/EDI",
1224 "Interface/Bank",
1225 "External/SubL",
1226 ];
1227 let pool = if rng.random::<f64>() < 0.80 {
1228 primary
1229 } else {
1230 CROSS
1231 };
1232 let idx = (rng.random::<u32>() as usize) % pool.len();
1233 pool[idx].to_string()
1234 }
1235
1236 fn sample_sap_source_code(&mut self) -> Option<String> {
1243 if let Some(p) = self.loaded_priors.as_ref() {
1244 return Some(p.source_mix.sample(&mut self.rng));
1245 }
1246 if self.config.synthetic_source_codes.unwrap_or(true) {
1247 return Some(DEFAULT_SOURCE_MIX.sample(&mut self.source_mix_rng));
1250 }
1251 None
1252 }
1253
1254 fn pick_recurring_archetype(
1262 &mut self,
1263 company: &str,
1264 doc_type: &str,
1265 debit_count: usize,
1266 credit_count: usize,
1267 ) -> Option<(Vec<String>, Vec<String>)> {
1268 if !self.config.recurring_templates.unwrap_or(true) {
1269 return None;
1270 }
1271 let p_reuse_opt = self.config.archetype_reuse_probability;
1276 if p_reuse_opt.is_none() && self.loaded_priors.is_some() {
1277 return None;
1278 }
1279 let p_reuse = p_reuse_opt.unwrap_or(0.90);
1280 if self.template_rng.random::<f64>() >= p_reuse {
1281 return None;
1282 }
1283 let lib = self
1284 .recurring_archetypes
1285 .get(&(company.to_string(), doc_type.to_string()))?;
1286 let matching: Vec<&(Vec<String>, Vec<String>)> = lib
1287 .iter()
1288 .filter(|(d, c)| d.len() == debit_count && c.len() == credit_count)
1289 .collect();
1290 if matching.is_empty() {
1291 return None;
1292 }
1293 let idx = Self::power_law_index(matching.len(), &mut self.template_rng).unwrap_or(0);
1301 Some(matching[idx].clone())
1302 }
1303
1304 fn cache_recurring_archetype(
1307 &mut self,
1308 company: &str,
1309 doc_type: &str,
1310 debit: Vec<String>,
1311 credit: Vec<String>,
1312 ) {
1313 if self.loaded_priors.is_some() || !self.config.recurring_templates.unwrap_or(true) {
1314 return;
1315 }
1316 if debit.is_empty() && credit.is_empty() {
1317 return;
1318 }
1319 const CAP: usize = 24; let lib = self
1321 .recurring_archetypes
1322 .entry((company.to_string(), doc_type.to_string()))
1323 .or_default();
1324 if lib.len() < CAP {
1325 lib.push((debit, credit));
1326 }
1327 }
1328
1329 fn maybe_generate_reversal(&mut self) -> Option<JournalEntry> {
1336 let rate = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE);
1337 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1338 return None;
1339 }
1340 if self.reversal_rng.random::<f64>() >= rate {
1341 return None;
1342 }
1343 let pick = (self.reversal_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1344 let mut entry = self.reversal_buffer.remove(pick);
1348 let orig_id = entry.header.document_id;
1349 let offset = 1 + (self.reversal_rng.random::<u32>() % 7) as i64;
1351 let mut rev_date = entry.header.posting_date + chrono::Duration::days(offset);
1352 if let Some(ref calc) = self.business_day_calculator {
1353 if !calc.is_business_day(rev_date) {
1354 rev_date = calc.next_business_day(rev_date, false);
1355 }
1356 }
1357 if rev_date > self.end_date {
1358 rev_date = entry.header.posting_date;
1359 }
1360 let rev_id =
1362 uuid::Uuid::from_u128(orig_id.as_u128() ^ 0x5245_5645_5253_414c_5245_5645_5253_414c);
1363 entry.header.document_id = rev_id;
1366 entry.header.posting_date = rev_date;
1367 entry.header.document_date = rev_date;
1368 entry.header.fiscal_year = rev_date.year() as u16;
1369 entry.header.fiscal_period = rev_date.month() as u8;
1370 entry.header.header_text = Some(format!("Reversal of {orig_id}"));
1371 entry.header.reference = Some(format!("REV-{orig_id}"));
1372 entry.header.batch_id = None;
1373 for line in entry.lines.iter_mut() {
1374 std::mem::swap(&mut line.debit_amount, &mut line.credit_amount);
1375 line.document_id = rev_id;
1376 }
1377 Some(entry)
1378 }
1379
1380 fn record_for_reversal(&mut self, entry: &JournalEntry) {
1384 let reversal_on = self.config.reversal_rate.unwrap_or(DEFAULT_REVERSAL_RATE) > 0.0;
1385 let allocation_on = self
1386 .config
1387 .allocation_batch_rate
1388 .unwrap_or(DEFAULT_ALLOCATION_RATE)
1389 > 0.0;
1390 if (!reversal_on && !allocation_on) || entry.lines.is_empty() {
1391 return;
1392 }
1393 const CAP: usize = 64;
1394 if self.reversal_buffer.len() >= CAP {
1395 self.reversal_buffer.remove(0);
1396 }
1397 self.reversal_buffer.push(entry.clone());
1398 }
1399
1400 fn maybe_apply_foreign_currency(&mut self, entry: &mut JournalEntry) {
1408 let prob = self.config.foreign_currency_rate.unwrap_or(0.0);
1409 if prob <= 0.0 || self.fx_rng.random::<f64>() >= prob {
1410 return;
1411 }
1412 let (code, rate) = FOREIGN_CCYS[self.fx_rng.random_range(0..FOREIGN_CCYS.len())];
1413 let rate_dec = match Decimal::from_f64_retain(rate) {
1414 Some(r) if r > Decimal::ZERO => r,
1415 _ => return,
1416 };
1417 entry.header.currency = code.to_string();
1418 entry.header.exchange_rate = rate_dec;
1419 for line in entry.lines.iter_mut() {
1420 let ledger = line.debit_amount + line.credit_amount; line.transaction_amount = Some((ledger / rate_dec).round_dp(2));
1422 }
1423 }
1424
1425 fn split_amount(total: Decimal, n: usize, rng: &mut ChaCha8Rng) -> Vec<Decimal> {
1430 let n = n.max(1);
1431 let total_cents = (total.round_dp(2) * Decimal::from(100))
1432 .to_i64()
1433 .unwrap_or(0);
1434 if n == 1 || total_cents < n as i64 {
1435 return vec![total];
1436 }
1437 let weights: Vec<f64> = (0..n).map(|_| 0.5 + rng.random::<f64>()).collect();
1438 let sumw: f64 = weights.iter().sum::<f64>().max(f64::EPSILON);
1439 let spare = total_cents - n as i64; let mut cents: Vec<i64> = weights
1441 .iter()
1442 .map(|w| 1 + (spare as f64 * w / sumw).floor() as i64)
1443 .collect();
1444 let assigned: i64 = cents.iter().sum();
1446 let leftover = total_cents - assigned;
1447 if let Some(maxp) = cents.iter_mut().max_by_key(|c| **c) {
1448 *maxp += leftover;
1449 }
1450 cents.into_iter().map(|c| Decimal::new(c, 2)).collect()
1451 }
1452
1453 fn business_unit_for_dimension(dim: &str) -> String {
1459 const N_BU: u32 = 11;
1460 let mut h: u32 = 0x811c_9dc5;
1461 for b in dim.bytes() {
1462 h ^= b as u32;
1463 h = h.wrapping_mul(0x0100_0193);
1464 }
1465 format!("BU{:02}", (h % N_BU) + 1)
1466 }
1467
1468 fn maybe_generate_allocation_batch(&mut self) -> Option<JournalEntry> {
1476 let rate = self
1477 .config
1478 .allocation_batch_rate
1479 .unwrap_or(DEFAULT_ALLOCATION_RATE);
1480 if rate <= 0.0 || self.reversal_buffer.is_empty() {
1481 return None;
1482 }
1483 if self.allocation_rng.random::<f64>() >= rate {
1484 return None;
1485 }
1486 let pick = (self.allocation_rng.random::<u32>() as usize) % self.reversal_buffer.len();
1487 let mut entry = self.reversal_buffer.remove(pick);
1490 let idx = entry
1492 .lines
1493 .iter()
1494 .enumerate()
1495 .filter(|(_, l)| l.debit_amount > Decimal::ZERO)
1496 .max_by(|a, b| a.1.debit_amount.cmp(&b.1.debit_amount))
1497 .map(|(i, _)| i)?;
1498 let template = entry.lines[idx].clone();
1499 let n = self
1500 .allocation_rng
1501 .random_range(ALLOCATION_MIN_TARGETS..=ALLOCATION_MAX_TARGETS) as usize;
1502 let parts = Self::split_amount(template.debit_amount, n, &mut self.allocation_rng);
1503 if parts.len() < ALLOCATION_MIN_TARGETS as usize {
1504 return None;
1506 }
1507 let company_code = entry.header.company_code.clone();
1509 let cc_pool: Vec<String> = if self.cost_center_pool.is_empty() {
1510 Self::COST_CENTER_POOL
1511 .iter()
1512 .map(|s| s.to_string())
1513 .collect()
1514 } else {
1515 let needle = format!("-{company_code}-");
1516 let filtered: Vec<String> = self
1517 .cost_center_pool
1518 .iter()
1519 .filter(|id| id.contains(&needle))
1520 .cloned()
1521 .collect();
1522 if filtered.is_empty() {
1523 self.cost_center_pool.clone()
1524 } else {
1525 filtered
1526 }
1527 };
1528 let mut new_lines: Vec<JournalEntryLine> =
1529 Vec::with_capacity(entry.lines.len() + parts.len() - 1);
1530 for (j, line) in entry.lines.iter().enumerate() {
1531 if j == idx {
1532 let bu_on = self.config.business_unit_dimension.unwrap_or(true);
1533 for (k, part) in parts.iter().enumerate() {
1534 let mut nl = template.clone();
1535 nl.debit_amount = *part;
1536 nl.credit_amount = Decimal::ZERO;
1537 nl.cost_center = Some(cc_pool[k % cc_pool.len()].clone());
1538 if bu_on {
1541 nl.business_unit = nl
1542 .cost_center
1543 .as_deref()
1544 .map(Self::business_unit_for_dimension);
1545 }
1546 new_lines.push(nl);
1547 }
1548 } else {
1549 new_lines.push(line.clone());
1550 }
1551 }
1552 let base_id = entry.header.document_id;
1554 let alloc_id =
1555 uuid::Uuid::from_u128(base_id.as_u128() ^ 0xA110_CA70_A110_CA70_A110_CA70_A110_CA70);
1556 entry.header.document_id = alloc_id;
1557 entry.header.sap_source_code = Some("AB".to_string());
1558 entry.header.header_text = Some("Allocation/assessment cycle".to_string());
1559 entry.header.reference = Some(format!("ALLOC-{base_id}"));
1560 entry.header.batch_id = None;
1561 for (i, line) in new_lines.iter_mut().enumerate() {
1562 line.line_number = (i + 1) as u32;
1563 line.document_id = alloc_id;
1564 }
1565 entry.lines = new_lines.into();
1566 Some(entry)
1567 }
1568
1569 fn determine_fraud(&mut self) -> Option<FraudType> {
1570 if !self.fraud_config.enabled {
1571 return None;
1572 }
1573
1574 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
1576 return None;
1577 }
1578
1579 Some(self.select_fraud_type())
1581 }
1582
1583 fn select_fraud_type(&mut self) -> FraudType {
1585 let dist = &self.fraud_config.fraud_type_distribution;
1586 let roll: f64 = self.rng.random();
1587
1588 let mut cumulative = 0.0;
1589
1590 cumulative += dist.suspense_account_abuse;
1591 if roll < cumulative {
1592 return FraudType::SuspenseAccountAbuse;
1593 }
1594
1595 cumulative += dist.fictitious_transaction;
1596 if roll < cumulative {
1597 return FraudType::FictitiousTransaction;
1598 }
1599
1600 cumulative += dist.revenue_manipulation;
1601 if roll < cumulative {
1602 return FraudType::RevenueManipulation;
1603 }
1604
1605 cumulative += dist.expense_capitalization;
1606 if roll < cumulative {
1607 return FraudType::ExpenseCapitalization;
1608 }
1609
1610 cumulative += dist.split_transaction;
1611 if roll < cumulative {
1612 return FraudType::SplitTransaction;
1613 }
1614
1615 cumulative += dist.timing_anomaly;
1616 if roll < cumulative {
1617 return FraudType::TimingAnomaly;
1618 }
1619
1620 cumulative += dist.unauthorized_access;
1621 if roll < cumulative {
1622 return FraudType::UnauthorizedAccess;
1623 }
1624
1625 cumulative += dist.duplicate_payment;
1626 if roll < cumulative {
1627 return FraudType::DuplicatePayment;
1628 }
1629
1630 cumulative += dist.kickback_scheme;
1631 if roll < cumulative {
1632 return FraudType::KickbackScheme;
1633 }
1634
1635 cumulative += dist.round_tripping;
1636 if roll < cumulative {
1637 return FraudType::RoundTripping;
1638 }
1639
1640 cumulative += dist.unauthorized_discount;
1641 if roll < cumulative {
1642 return FraudType::UnauthorizedDiscount;
1643 }
1644
1645 FraudType::DuplicatePayment
1647 }
1648
1649 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
1651 match fraud_type {
1652 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1653 FraudAmountPattern::ThresholdAdjacent
1654 }
1655 FraudType::FictitiousTransaction
1656 | FraudType::FictitiousEntry
1657 | FraudType::SuspenseAccountAbuse
1658 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
1659 FraudType::RevenueManipulation
1660 | FraudType::ExpenseCapitalization
1661 | FraudType::ImproperCapitalization
1662 | FraudType::ReserveManipulation
1663 | FraudType::UnauthorizedAccess
1664 | FraudType::PrematureRevenue
1665 | FraudType::UnderstatedLiabilities
1666 | FraudType::OverstatedAssets
1667 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
1668 FraudType::DuplicatePayment
1669 | FraudType::TimingAnomaly
1670 | FraudType::SelfApproval
1671 | FraudType::ExceededApprovalLimit
1672 | FraudType::SegregationOfDutiesViolation
1673 | FraudType::UnauthorizedApproval
1674 | FraudType::CollusiveApproval
1675 | FraudType::FictitiousVendor
1676 | FraudType::ShellCompanyPayment
1677 | FraudType::Kickback
1678 | FraudType::KickbackScheme
1679 | FraudType::UnauthorizedDiscount
1680 | FraudType::RoundTripping
1681 | FraudType::InvoiceManipulation
1682 | FraudType::AssetMisappropriation
1683 | FraudType::InventoryTheft
1684 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
1685 FraudType::ImproperRevenueRecognition
1687 | FraudType::ImproperPoAllocation
1688 | FraudType::VariableConsiderationManipulation
1689 | FraudType::ContractModificationMisstatement => {
1690 FraudAmountPattern::StatisticallyImprobable
1691 }
1692 FraudType::LeaseClassificationManipulation
1694 | FraudType::OffBalanceSheetLease
1695 | FraudType::LeaseLiabilityUnderstatement
1696 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
1697 FraudType::FairValueHierarchyManipulation
1699 | FraudType::Level3InputManipulation
1700 | FraudType::ValuationTechniqueManipulation => {
1701 FraudAmountPattern::StatisticallyImprobable
1702 }
1703 FraudType::DelayedImpairment
1705 | FraudType::ImpairmentTestAvoidance
1706 | FraudType::CashFlowProjectionManipulation
1707 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
1708 FraudType::BidRigging
1710 | FraudType::PhantomVendorContract
1711 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
1712 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
1713 FraudType::GhostEmployeePayroll
1715 | FraudType::PayrollInflation
1716 | FraudType::DuplicateExpenseReport
1717 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1718 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1719 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1721 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1722 }
1723 }
1724
1725 #[inline]
1727 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1728 self.uuid_factory.next()
1729 }
1730
1731 const COST_CENTER_POOL: &'static [&'static str] =
1733 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1734
1735 fn enrich_line_items(&mut self, entry: &mut JournalEntry) {
1744 let posting_date = entry.header.posting_date;
1745 let company_code = &entry.header.company_code;
1746 let header_text = entry.header.header_text.clone();
1747 let business_process = entry.header.business_process;
1748 let doc_type_key = entry.header.document_type.clone();
1751
1752 let header_sap_code: Option<String> = entry.header.sap_source_code.clone();
1756
1757 let (cc_pc_neighbor_vec, cc_pc_share_prob): (Vec<String>, f64) =
1761 if let Some(priors) = &self.loaded_priors {
1762 if let Some(motifs) = &priors.cross_entity_motifs {
1763 (
1764 motifs.neighbors(&doc_type_key).to_vec(),
1765 motifs.should_share(&doc_type_key),
1766 )
1767 } else {
1768 (Vec::new(), 0.0)
1769 }
1770 } else {
1771 (Vec::new(), 0.0)
1772 };
1773
1774 let doc_id_bytes = entry.header.document_id.as_bytes();
1776 let mut cc_seed: usize = 0;
1777 for &b in doc_id_bytes {
1778 cc_seed = cc_seed.wrapping_add(b as usize);
1779 }
1780
1781 for (i, line) in entry.lines.iter_mut().enumerate() {
1782 if line.account_description.is_none() {
1784 line.account_description = self
1785 .coa
1786 .get_account(&line.gl_account)
1787 .map(|a| a.short_description.clone());
1788 }
1789
1790 if line.cost_center.is_none() {
1809 let priors_opt = &mut self.loaded_priors;
1814 let rng_ref = &mut self.rng;
1815 if let Some(priors) = priors_opt {
1816 let sp37_cc = header_sap_code.as_deref().and_then(|code| {
1817 priors.sample_attribute_for_source(code, "cost_center", rng_ref)
1818 });
1819 if sp37_cc.is_some() {
1820 line.cost_center = sp37_cc;
1821 } else if let Some(sampler) = priors.fanout_samplers.get_mut("CostCenter") {
1822 line.cost_center = Some(sampler.pick_for_with_neighbors(
1823 &doc_type_key,
1824 &cc_pc_neighbor_vec,
1825 cc_pc_share_prob,
1826 rng_ref,
1827 ));
1828 }
1829 }
1830 }
1831 if line.cost_center.is_none() {
1832 let first_char = line.gl_account.chars().next().unwrap_or('0');
1833 if first_char == '5' || first_char == '6' {
1834 if !self.cost_center_pool.is_empty() {
1835 let needle = format!("-{company_code}-");
1836 let candidates: Vec<&String> = self
1837 .cost_center_pool
1838 .iter()
1839 .filter(|id| id.contains(&needle))
1840 .collect();
1841 let pool: Vec<&String> = if candidates.is_empty() {
1842 self.cost_center_pool.iter().collect()
1843 } else {
1844 candidates
1845 };
1846 let idx = cc_seed.wrapping_add(i) % pool.len();
1847 line.cost_center = Some(pool[idx].clone());
1848 } else {
1849 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1850 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1851 }
1852 }
1853 }
1854
1855 if line.profit_center.is_none() {
1863 let priors_opt = &mut self.loaded_priors;
1868 let rng_ref = &mut self.rng;
1869 if let Some(priors) = priors_opt {
1870 let sp37_pc = header_sap_code.as_deref().and_then(|code| {
1871 priors.sample_attribute_for_source(code, "profit_center", rng_ref)
1872 });
1873 if sp37_pc.is_some() {
1874 line.profit_center = sp37_pc;
1875 } else if let Some(sampler) = priors.fanout_samplers.get_mut("ProfitCenter") {
1876 line.profit_center = Some(sampler.pick_for_with_neighbors(
1877 &doc_type_key,
1878 &cc_pc_neighbor_vec,
1879 cc_pc_share_prob,
1880 rng_ref,
1881 ));
1882 }
1883 }
1884 }
1885 if line.profit_center.is_none() {
1886 if !self.profit_center_pool.is_empty() {
1887 let needle = format!("-{company_code}-");
1888 let candidates: Vec<&String> = self
1889 .profit_center_pool
1890 .iter()
1891 .filter(|id| id.contains(&needle))
1892 .collect();
1893 let pool: Vec<&String> = if candidates.is_empty() {
1894 self.profit_center_pool.iter().collect()
1895 } else {
1896 candidates
1897 };
1898 let idx = cc_seed.wrapping_add(i) % pool.len();
1899 line.profit_center = Some(pool[idx].clone());
1900 } else {
1901 let suffix = match business_process {
1902 Some(BusinessProcess::P2P) => "-P2P",
1903 Some(BusinessProcess::O2C) => "-O2C",
1904 Some(BusinessProcess::R2R) => "-R2R",
1905 Some(BusinessProcess::H2R) => "-H2R",
1906 _ => "",
1907 };
1908 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1909 }
1910 }
1911
1912 if line.business_unit.is_none() && self.config.business_unit_dimension.unwrap_or(true) {
1919 if let Some(dim) = line
1920 .cost_center
1921 .as_deref()
1922 .or(line.profit_center.as_deref())
1923 {
1924 line.business_unit = Some(Self::business_unit_for_dimension(dim));
1925 }
1926 }
1927
1928 if line.trading_partner.is_none() {
1934 line.trading_partner = entry.header.trading_partner.clone();
1935 }
1936
1937 if line.line_text.is_none() {
1939 line.line_text = header_text.clone();
1940 }
1941
1942 if line.value_date.is_none()
1944 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1945 {
1946 line.value_date = Some(posting_date);
1947 }
1948
1949 if line.assignment.is_none() {
1951 if line.gl_account.starts_with("2000") {
1952 if let Some(ref ht) = header_text {
1954 if let Some(vendor_part) = ht.rsplit(" - ").next() {
1956 if vendor_part.starts_with("V-")
1957 || vendor_part.starts_with("VENDOR")
1958 || vendor_part.starts_with("Vendor")
1959 {
1960 line.assignment = Some(vendor_part.to_string());
1961 }
1962 }
1963 }
1964 } else if line.gl_account.starts_with("1100") {
1965 if let Some(ref ht) = header_text {
1967 if let Some(customer_part) = ht.rsplit(" - ").next() {
1968 if customer_part.starts_with("C-")
1969 || customer_part.starts_with("CUST")
1970 || customer_part.starts_with("Customer")
1971 {
1972 line.assignment = Some(customer_part.to_string());
1973 }
1974 }
1975 }
1976 }
1977 }
1978 }
1979 }
1980
1981 pub fn generate(&mut self) -> JournalEntry {
1983 debug!(
1984 count = self.count,
1985 companies = self.companies.len(),
1986 start_date = %self.start_date,
1987 end_date = %self.end_date,
1988 "Generating journal entry"
1989 );
1990
1991 if let Some(ref state) = self.batch_state {
1993 if state.remaining > 0 {
1994 return self.generate_batched_entry();
1995 }
1996 }
1997
1998 if let Some(rev) = self.maybe_generate_reversal() {
2001 return rev;
2002 }
2003
2004 if let Some(alloc) = self.maybe_generate_allocation_batch() {
2007 return alloc;
2008 }
2009
2010 if self.md_resolver.companies.is_empty()
2013 && self.md_resolver.persons.is_empty()
2014 && self.md_resolver.patients.is_empty()
2015 {
2016 self.refresh_md_resolver();
2017 }
2018
2019 self.count += 1;
2020
2021 let document_id = self.generate_deterministic_uuid();
2023
2024 let mut posting_date = if self.loaded_priors.is_none() {
2040 let mut d = self
2041 .temporal_sampler
2042 .sample_date(self.start_date, self.end_date);
2043 if let Some(ref calc) = self.business_day_calculator {
2045 if !calc.is_business_day(d) {
2046 d = calc.next_business_day(d, false);
2047 if d > self.end_date {
2048 d = calc.prev_business_day(self.end_date, true);
2049 }
2050 }
2051 }
2052 d
2053 } else {
2054 self.start_date
2057 };
2058
2059 let company_code = self.company_selector.select(&mut self.rng).to_string();
2061
2062 let copula_uv: Option<(f64, f64)> =
2066 self.correlation_copula.as_mut().map(|cop| cop.sample());
2067
2068 let mut line_spec = self.line_sampler.sample();
2077 if let Some((_u, v)) = copula_uv {
2078 let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
2079 let old_debit = line_spec.debit_count.max(1);
2080 let old_credit = line_spec.credit_count.max(1);
2081 let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2082 .round() as usize;
2083 let new_debit = new_debit.clamp(1, new_total - 1);
2084 let new_credit = new_total - new_debit;
2085 line_spec.total_count = new_total;
2086 line_spec.debit_count = new_debit;
2087 line_spec.credit_count = new_credit;
2088 }
2089
2090 if let Some(cap) = self.config.lines_per_je_cap {
2094 let cap = cap.max(2);
2095 let total = line_spec.debit_count + line_spec.credit_count;
2096 if total > cap {
2097 let new_debit =
2098 ((line_spec.debit_count as f64 / total as f64) * cap as f64).round() as usize;
2099 let new_debit = new_debit.clamp(1, cap - 1);
2100 let new_credit = cap - new_debit;
2101 line_spec.total_count = cap;
2102 line_spec.debit_count = new_debit;
2103 line_spec.credit_count = new_credit;
2104 }
2105 }
2106
2107 let source = self.select_source();
2109 let is_automated = matches!(
2110 source,
2111 TransactionSource::Automated | TransactionSource::Recurring
2112 );
2113
2114 let sap_source_code: Option<String> = self.sample_sap_source_code();
2120 self.current_je_source = sap_source_code.clone();
2123
2124 let business_process = self.select_business_process();
2126
2127 {
2137 let priors_opt = &mut self.loaded_priors;
2139 let rng_ref = &mut self.rng;
2140 let iet_accum_ref = &mut self.iet_day_accum;
2141 if let Some(priors) = priors_opt {
2142 let doc_type = Self::document_type_for_process(business_process).to_string();
2143 let period_days = (self.end_date - self.start_date).num_days().max(1) as f64;
2144 let iet = priors
2145 .iet_sampler
2146 .sample_next(&doc_type, rng_ref)
2147 .max(0.001);
2148 let accum = iet_accum_ref.entry(doc_type).or_insert(0.0);
2149 *accum += iet;
2150 if *accum >= period_days {
2152 *accum %= period_days;
2153 }
2154 let day_offset =
2155 (*accum as i64).clamp(0, (self.end_date - self.start_date).num_days());
2156 posting_date = self.start_date + chrono::Duration::days(day_offset);
2157 if let Some(ref calc) = self.business_day_calculator {
2160 if !calc.is_business_day(posting_date) {
2161 posting_date = calc.next_business_day(posting_date, false);
2162 if posting_date > self.end_date {
2163 posting_date = calc.prev_business_day(self.end_date, true);
2164 }
2165 }
2166 }
2167 } } if let Some(ref priors) = self.loaded_priors {
2184 let doc_type = Self::document_type_for_process(business_process);
2185 let day_in_period = (posting_date - self.start_date).num_days();
2186 let active = match &priors.multi_segment_window {
2187 Some(msw) => msw.is_active(doc_type, day_in_period),
2188 None => priors.active_window.is_active(doc_type, day_in_period),
2189 };
2190 if !active {
2191 posting_date = self
2196 .temporal_sampler
2197 .sample_date(self.start_date, self.end_date);
2198 if let Some(ref calc) = self.business_day_calculator {
2199 if !calc.is_business_day(posting_date) {
2200 posting_date = calc.next_business_day(posting_date, false);
2201 if posting_date > self.end_date {
2202 posting_date = calc.prev_business_day(self.end_date, true);
2203 }
2204 }
2205 }
2206 }
2207 }
2208
2209 if let Some(ref priors) = self.loaded_priors {
2218 let doc_type = Self::document_type_for_process(business_process);
2219 let hist = priors
2220 .lines_per_je
2221 .by_source
2222 .get(doc_type)
2223 .unwrap_or(&priors.lines_per_je.overall);
2224 let n_total = (hist.sample_bucket(&mut self.rng) as usize).max(2);
2225 let old_debit = line_spec.debit_count.max(1);
2226 let old_credit = line_spec.credit_count.max(1);
2227 let new_debit = (n_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
2228 .round() as usize;
2229 let new_debit = new_debit.clamp(1, n_total - 1);
2230 line_spec.total_count = n_total;
2231 line_spec.debit_count = new_debit;
2232 line_spec.credit_count = n_total - new_debit;
2233 }
2234
2235 let fraud_type = self.determine_fraud();
2237 let is_fraud = fraud_type.is_some();
2238
2239 let time = self.temporal_sampler.sample_time(!is_automated);
2241 let created_at = posting_date.and_time(time).and_utc();
2242
2243 let (created_by, user_persona) = self.select_user(is_automated);
2245
2246 let mut header =
2248 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
2249 header.created_at = created_at;
2250 header.source = source;
2251 header.sap_source_code = sap_source_code;
2252
2253 {
2261 let code_opt = header.sap_source_code.clone();
2262 if let Some(ref code) = code_opt {
2263 let rng_ref = &mut self.rng;
2264 let tp_neighbors: Vec<String> = if let Some(ref priors) = self.loaded_priors {
2268 if let Some(ref motifs) = priors.tp_motif_sampler {
2269 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
2270 motifs.neighbors(last_tp).to_vec()
2271 } else {
2272 Vec::new()
2273 }
2274 } else {
2275 Vec::new()
2276 }
2277 } else {
2278 Vec::new()
2279 };
2280 let tp_share_prob: f64 = if let Some(ref priors) = self.loaded_priors {
2281 if let Some(ref motifs) = priors.tp_motif_sampler {
2282 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
2283 motifs.should_share(last_tp)
2284 } else {
2285 0.0
2286 }
2287 } else {
2288 0.0
2289 }
2290 } else {
2291 0.0
2292 };
2293
2294 if let Some(ref mut priors) = self.loaded_priors {
2295 let tp = if !tp_neighbors.is_empty()
2299 && tp_share_prob > 0.0
2300 && rng_ref.random_range(0.0..1.0) < tp_share_prob
2301 {
2302 use datasynth_core::distributions::behavioral_priors::CategoricalDistribution;
2306 let filtered: std::collections::BTreeMap<String, f64> = priors
2307 .per_source_attribute
2308 .as_ref()
2309 .and_then(|psa| psa.conditional(code, "trading_partner"))
2310 .map(|dist| {
2311 dist.probabilities
2312 .iter()
2313 .filter(|(v, _)| tp_neighbors.contains(v))
2314 .map(|(v, p)| (v.clone(), *p))
2315 .collect()
2316 })
2317 .unwrap_or_default();
2318 if filtered.is_empty() {
2319 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2320 } else {
2321 let neighbour_dist = CategoricalDistribution {
2322 probabilities: filtered,
2323 n: 0, };
2325 neighbour_dist.sample(rng_ref).or_else(|| {
2326 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2327 })
2328 }
2329 } else {
2330 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
2331 };
2332 header.trading_partner = tp;
2333 }
2334 if let Some(ref tp) = header.trading_partner {
2337 self.last_tp_by_source.insert(code.clone(), tp.clone());
2338 }
2339 }
2340 }
2341
2342 let (created_by, created_at) = {
2347 let sap_code_for_user = header.sap_source_code.clone();
2348 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
2349 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
2350 let new_created_at = if let Some((hour, _)) =
2351 priors.sample_timestamp_for_user(&uid, &mut self.rng)
2352 {
2353 let base = header.created_at;
2354 base.date_naive()
2355 .and_hms_opt(hour, 0, 0)
2356 .map(|naive| naive.and_utc())
2357 .unwrap_or(base)
2358 } else {
2359 header.created_at
2360 };
2361 (uid, new_created_at)
2362 } else {
2363 (created_by, header.created_at)
2364 }
2365 } else {
2366 (created_by, header.created_at)
2367 }
2368 };
2369
2370 header.created_by = created_by;
2371 header.created_at = created_at;
2372 header.user_persona = user_persona;
2373 header.business_process = Some(business_process);
2374 header.document_type = Self::document_type_for_process(business_process).to_string();
2375 header.is_fraud = is_fraud;
2376 header.fraud_type = fraud_type;
2377
2378 let is_manual = matches!(source, TransactionSource::Manual);
2380 header.is_manual = is_manual;
2381
2382 header.source_system = Self::pick_source_system(&mut self.rng, is_manual, business_process);
2396
2397 let is_post_close = posting_date.month() == self.end_date.month()
2400 && posting_date.year() == self.end_date.year()
2401 && posting_date.day() > 25;
2402 header.is_post_close = is_post_close;
2403
2404 let created_date = if is_manual {
2407 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
2408 } else {
2409 let lag_days = self.rng.random_range(0i64..=3);
2410 let created_naive_date = posting_date
2411 .checked_sub_signed(chrono::Duration::days(lag_days))
2412 .unwrap_or(posting_date);
2413 created_naive_date.and_hms_opt(
2414 self.rng.random_range(8u32..=17),
2415 self.rng.random_range(0u32..=59),
2416 self.rng.random_range(0u32..=59),
2417 )
2418 };
2419 header.created_date = created_date;
2420
2421 let mut context =
2423 DescriptionContext::with_period(posting_date.month(), posting_date.year());
2424
2425 match business_process {
2427 BusinessProcess::P2P => {
2428 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
2429 context.vendor_name = Some(vendor.name.clone());
2430 }
2431 }
2432 BusinessProcess::O2C => {
2433 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
2434 context.customer_name = Some(customer.name.clone());
2435 }
2436 }
2437 _ => {}
2438 }
2439
2440 if self.template_config.descriptions.generate_header_text {
2444 let priors_header = if let Some(src) = header.sap_source_code.as_deref() {
2445 if let Some(p) = self.loaded_priors.as_ref() {
2446 p.sample_header_template(src, &mut self.md_resolver, &mut self.rng)
2448 } else {
2449 None
2450 }
2451 } else {
2452 None
2453 };
2454 header.header_text = Some(priors_header.unwrap_or_else(|| {
2455 self.description_generator.generate_header_text(
2456 business_process,
2457 &context,
2458 &mut self.rng,
2459 )
2460 }));
2461 }
2462
2463 if self.template_config.references.generate_references {
2470 let priors_ref = header.sap_source_code.as_deref().and_then(|src| {
2471 self.loaded_priors
2472 .as_ref()
2473 .and_then(|p| p.sample_reference(src, &mut self.rng))
2474 });
2475 header.reference = Some(priors_ref.unwrap_or_else(|| {
2476 self.reference_generator
2477 .generate_for_process_year(business_process, posting_date.year())
2478 }));
2479 }
2480
2481 header.source_document = header
2483 .reference
2484 .as_deref()
2485 .and_then(DocumentRef::parse)
2486 .or_else(|| {
2487 if header.source == TransactionSource::Manual {
2488 Some(DocumentRef::Manual)
2489 } else {
2490 None
2491 }
2492 });
2493
2494 let mut entry = JournalEntry::new(header);
2496
2497 let base_amount = if let Some(ft) = fraud_type {
2503 let pattern = self.fraud_type_to_amount_pattern(ft);
2504 self.amount_sampler.sample_fraud(pattern)
2505 } else if let Some(ref mut adv) = self.advanced_amount_sampler {
2506 adv.sample_decimal()
2507 } else {
2508 self.amount_sampler.sample()
2509 };
2510 let base_amount = if fraud_type.is_none() {
2516 let input = self.conditional_input_value(posting_date);
2520 if let Some(ref mut cond) = self.conditional_amount_override {
2521 cond.sample_decimal(input)
2522 } else {
2523 base_amount
2524 }
2525 } else {
2526 base_amount
2527 };
2528
2529 const PRIORS_AMOUNT_BYPASS_SHARE: f64 = 0.25;
2551 let base_amount = if fraud_type.is_none() {
2552 if let Some(src) = entry.header.sap_source_code.as_deref() {
2553 let src_owned = src.to_string();
2554 let use_conditional = self.loaded_priors.is_some()
2557 && self.rng.random_range(0.0..1.0) >= PRIORS_AMOUNT_BYPASS_SHARE;
2558 if use_conditional {
2559 let priors_ref = &mut self.loaded_priors;
2560 let rng_ref = &mut self.rng;
2561 if let Some(priors) = priors_ref {
2562 priors
2563 .sample_amount_for_source(&src_owned, "", rng_ref)
2564 .and_then(|v| {
2565 if v.is_finite() && v > 0.0 {
2566 Decimal::from_f64_retain(v)
2567 } else {
2568 None
2569 }
2570 })
2571 .unwrap_or(base_amount)
2572 } else {
2573 base_amount
2574 }
2575 } else {
2576 base_amount
2577 }
2578 } else {
2579 base_amount
2580 }
2581 } else {
2582 base_amount
2583 };
2584
2585 let base_amount = if fraud_type.is_none() {
2596 if let Some((u, _v)) = copula_uv {
2597 if let Some(ref adv) = self.advanced_amount_sampler {
2598 adv.ppf_decimal(u)
2599 } else {
2600 let log_mult = 4.0 * (u - 0.5);
2601 let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
2602 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2603 }
2604 } else {
2605 base_amount
2606 }
2607 } else {
2608 base_amount
2609 };
2610
2611 let drift_adjusted_amount = {
2613 let drift = self.get_drift_adjustments(posting_date);
2614 if drift.amount_mean_multiplier != 1.0 {
2615 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
2617 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
2618 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
2619 } else {
2620 base_amount
2621 }
2622 };
2623
2624 let total_amount = if is_automated {
2626 drift_adjusted_amount } else {
2628 self.apply_human_variation(drift_adjusted_amount)
2629 };
2630
2631 let doc_type_for_fanout = Self::document_type_for_process(business_process).to_string();
2635
2636 let (gl_neighbor_vec, gl_share_prob): (Vec<String>, f64) =
2641 if let Some(priors) = &self.loaded_priors {
2642 if let Some(motifs) = &priors.cross_entity_motifs {
2643 (
2644 motifs.neighbors(&doc_type_for_fanout).to_vec(),
2645 motifs.should_share(&doc_type_for_fanout),
2646 )
2647 } else {
2648 (Vec::new(), 0.0)
2649 }
2650 } else {
2651 (Vec::new(), 0.0)
2652 };
2653
2654 let reuse_archetype = self.pick_recurring_archetype(
2661 &entry.header.company_code,
2662 &doc_type_for_fanout,
2663 line_spec.debit_count,
2664 line_spec.credit_count,
2665 );
2666 let mut fresh_debit_accts: Vec<String> = Vec::new();
2667 let mut fresh_credit_accts: Vec<String> = Vec::new();
2668 let sota8_active = self.config.source_conditional_account_pair.enabled;
2671
2672 let debit_amounts = self
2674 .amount_sampler
2675 .sample_summing_to(line_spec.debit_count, total_amount);
2676 for (i, amount) in debit_amounts.into_iter().enumerate() {
2677 let debit_fallback = self.select_debit_account().account_number.clone();
2686 let account_number = if sota8_active {
2692 debit_fallback
2693 } else {
2694 let priors_opt = &mut self.loaded_priors;
2695 let rng_ref = &mut self.rng;
2696 if let Some(priors) = priors_opt {
2697 let sp46_gl = entry
2701 .header
2702 .sap_source_code
2703 .as_deref()
2704 .and_then(|code| priors.sample_gl_for_source_role(code, "DR", rng_ref));
2705 if let Some(gl) = sp46_gl {
2706 gl
2707 } else {
2708 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2710 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2711 });
2712 if let Some(gl) = sp37_gl {
2713 gl
2714 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2715 sampler.pick_for_with_neighbors(
2717 &doc_type_for_fanout,
2718 &gl_neighbor_vec,
2719 gl_share_prob,
2720 rng_ref,
2721 )
2722 } else {
2723 debit_fallback
2724 }
2725 }
2726 } else {
2727 debit_fallback
2728 }
2729 };
2730 let mut line = JournalEntryLine::debit(
2731 entry.header.document_id,
2732 (i + 1) as u32,
2733 account_number.clone(),
2734 amount,
2735 );
2736
2737 if self.template_config.descriptions.generate_line_text {
2740 let src = entry.header.sap_source_code.as_deref();
2741 let priors_line = if let Some(s) = src {
2742 if let Some(p) = self.loaded_priors.as_ref() {
2743 let account_class = p
2744 .coa_semantic
2745 .as_ref()
2746 .and_then(|c| c.accounts.get(&account_number))
2747 .and_then(|a| a.account_class.as_deref())
2748 .unwrap_or(
2749 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2750 );
2751 p.sample_line_template(
2753 s,
2754 account_class,
2755 &mut self.md_resolver,
2756 &mut self.rng,
2757 )
2758 } else {
2759 None
2760 }
2761 } else {
2762 None
2763 };
2764 line.line_text = Some(priors_line.unwrap_or_else(|| {
2765 self.description_generator.generate_line_text(
2766 &account_number,
2767 &context,
2768 &mut self.rng,
2769 )
2770 }));
2771 }
2772
2773 if let Some((ref d, _)) = reuse_archetype {
2781 if let Some(a) = d.get(i) {
2782 line.gl_account = a.clone();
2783 }
2784 } else if self.loaded_priors.is_none() {
2785 fresh_debit_accts.push(line.gl_account.clone());
2786 }
2787 entry.add_line(line);
2788 }
2789
2790 let credit_amounts = self
2792 .amount_sampler
2793 .sample_summing_to(line_spec.credit_count, total_amount);
2794 for (i, amount) in credit_amounts.into_iter().enumerate() {
2795 let credit_fallback = self.select_credit_account().account_number.clone();
2797 let account_number = if sota8_active {
2799 credit_fallback
2800 } else {
2801 let priors_opt = &mut self.loaded_priors;
2802 let rng_ref = &mut self.rng;
2803 if let Some(priors) = priors_opt {
2804 let sp46_gl = entry
2805 .header
2806 .sap_source_code
2807 .as_deref()
2808 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
2809 if let Some(gl) = sp46_gl {
2810 gl
2811 } else {
2812 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
2813 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
2814 });
2815 if let Some(gl) = sp37_gl {
2816 gl
2817 } else if let Some(sampler) = priors.fanout_samplers.get_mut("GLAccount") {
2818 sampler.pick_for_with_neighbors(
2819 &doc_type_for_fanout,
2820 &gl_neighbor_vec,
2821 gl_share_prob,
2822 rng_ref,
2823 )
2824 } else {
2825 credit_fallback
2826 }
2827 }
2828 } else {
2829 credit_fallback
2830 }
2831 };
2832 let mut line = JournalEntryLine::credit(
2833 entry.header.document_id,
2834 (line_spec.debit_count + i + 1) as u32,
2835 account_number.clone(),
2836 amount,
2837 );
2838
2839 if self.template_config.descriptions.generate_line_text {
2842 let src = entry.header.sap_source_code.as_deref();
2843 let priors_line = if let Some(s) = src {
2844 if let Some(p) = self.loaded_priors.as_ref() {
2845 let account_class = p
2846 .coa_semantic
2847 .as_ref()
2848 .and_then(|c| c.accounts.get(&account_number))
2849 .and_then(|a| a.account_class.as_deref())
2850 .unwrap_or(
2851 datasynth_core::distributions::text_taxonomy::TextTaxonomyPrior::UNKNOWN_CLASS,
2852 );
2853 p.sample_line_template(
2855 s,
2856 account_class,
2857 &mut self.md_resolver,
2858 &mut self.rng,
2859 )
2860 } else {
2861 None
2862 }
2863 } else {
2864 None
2865 };
2866 line.line_text = Some(priors_line.unwrap_or_else(|| {
2867 self.description_generator.generate_line_text(
2868 &account_number,
2869 &context,
2870 &mut self.rng,
2871 )
2872 }));
2873 }
2874
2875 if let Some((_, ref c)) = reuse_archetype {
2879 if let Some(a) = c.get(i) {
2880 line.gl_account = a.clone();
2881 }
2882 } else if self.loaded_priors.is_none() {
2883 fresh_credit_accts.push(line.gl_account.clone());
2884 }
2885 entry.add_line(line);
2886 }
2887
2888 if reuse_archetype.is_none() {
2891 self.cache_recurring_archetype(
2892 &entry.header.company_code,
2893 &doc_type_for_fanout,
2894 std::mem::take(&mut fresh_debit_accts),
2895 std::mem::take(&mut fresh_credit_accts),
2896 );
2897 }
2898
2899 self.enrich_line_items(&mut entry);
2901
2902 if self.persona_errors_enabled && !is_automated {
2904 self.maybe_inject_persona_error(&mut entry);
2905 }
2906
2907 if self.approval_enabled {
2909 self.maybe_apply_approval_workflow(&mut entry, posting_date);
2910 }
2911
2912 self.populate_approval_fields(&mut entry, posting_date);
2914
2915 self.maybe_start_batch(&entry);
2917
2918 if self.velocity_calibrator.is_some() {
2921 let mut pending: Vec<crate::velocity_calibrator::CalibrationStep> = Vec::new();
2922 for line in &entry.lines {
2923 if let Some(step) = self
2924 .velocity_calibrator
2925 .as_mut()
2926 .and_then(|cal| cal.observe_line(line))
2927 {
2928 pending.push(step);
2929 }
2930 }
2931 for step in pending {
2932 self.apply_calibration_step(&step);
2933 }
2934 }
2935
2936 self.maybe_apply_foreign_currency(&mut entry);
2939
2940 self.record_for_reversal(&entry);
2942
2943 entry
2944 }
2945
2946 fn apply_calibration_step(&mut self, step: &crate::velocity_calibrator::CalibrationStep) {
2954 match step.parameter.as_str() {
2955 "amounts.lognormal_sigma" => {
2956 self.amount_sampler.set_lognormal_sigma(step.new_value);
2957 }
2958 "amounts.round_dollar_share" => {
2959 self.amount_sampler
2960 .set_round_number_probability(step.new_value);
2961 }
2962 _ => {
2963 }
2966 }
2967 }
2968
2969 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
2974 self.persona_errors_enabled = enabled;
2975 self
2976 }
2977
2978 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
2983 self.fraud_config = config;
2984 self
2985 }
2986
2987 pub fn persona_errors_enabled(&self) -> bool {
2989 self.persona_errors_enabled
2990 }
2991
2992 pub fn with_batching(mut self, enabled: bool) -> Self {
2997 if !enabled {
2998 self.batch_state = None;
2999 }
3000 self
3001 }
3002
3003 pub fn batching_enabled(&self) -> bool {
3005 true
3007 }
3008
3009 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
3014 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
3016 return;
3017 }
3018
3019 if self.rng.random::<f64>() > 0.15 {
3021 return;
3022 }
3023
3024 let base_account = entry
3026 .lines
3027 .first()
3028 .map(|l| l.gl_account.clone())
3029 .unwrap_or_default();
3030
3031 let base_amount = entry.total_debit();
3032
3033 self.batch_state = Some(BatchState {
3034 base_account_number: base_account,
3035 base_amount,
3036 base_business_process: entry.header.business_process,
3037 base_posting_date: entry.header.posting_date,
3038 remaining: self.rng.random_range(2..7), });
3040 }
3041
3042 fn generate_batched_entry(&mut self) -> JournalEntry {
3050 use rust_decimal::Decimal;
3051
3052 if let Some(ref mut state) = self.batch_state {
3054 state.remaining = state.remaining.saturating_sub(1);
3055 }
3056
3057 let Some(batch) = self.batch_state.clone() else {
3058 tracing::warn!(
3061 "generate_batched_entry called without batch_state; generating standard entry"
3062 );
3063 self.batch_state = None;
3064 return self.generate();
3065 };
3066
3067 let posting_date = batch.base_posting_date;
3069
3070 self.count += 1;
3071 let document_id = self.generate_deterministic_uuid();
3072
3073 let company_code = self.company_selector.select(&mut self.rng).to_string();
3075
3076 let _line_spec = LineItemSpec {
3078 total_count: 2,
3079 debit_count: 1,
3080 credit_count: 1,
3081 split_type: DebitCreditSplit::Equal,
3082 };
3083
3084 let source = TransactionSource::Manual;
3086
3087 let sap_source_code: Option<String> = self.sample_sap_source_code();
3089 self.current_je_source = sap_source_code.clone();
3091
3092 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
3094
3095 let time = self.temporal_sampler.sample_time(true);
3097 let created_at = posting_date.and_time(time).and_utc();
3098
3099 let (created_by, user_persona) = self.select_user(false);
3101
3102 let mut header =
3104 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
3105 header.created_at = created_at;
3106 header.source = source;
3107 header.sap_source_code = sap_source_code;
3108
3109 {
3113 let code_opt = header.sap_source_code.clone();
3114 if let Some(ref code) = code_opt {
3115 let rng_ref = &mut self.rng;
3116 let tp_neighbors: Vec<String> = if let Some(ref priors) = self.loaded_priors {
3117 if let Some(ref motifs) = priors.tp_motif_sampler {
3118 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3119 motifs.neighbors(last_tp).to_vec()
3120 } else {
3121 Vec::new()
3122 }
3123 } else {
3124 Vec::new()
3125 }
3126 } else {
3127 Vec::new()
3128 };
3129 let tp_share_prob: f64 = if let Some(ref priors) = self.loaded_priors {
3130 if let Some(ref motifs) = priors.tp_motif_sampler {
3131 if let Some(last_tp) = self.last_tp_by_source.get(code.as_str()) {
3132 motifs.should_share(last_tp)
3133 } else {
3134 0.0
3135 }
3136 } else {
3137 0.0
3138 }
3139 } else {
3140 0.0
3141 };
3142 if let Some(ref mut priors) = self.loaded_priors {
3143 use datasynth_core::distributions::behavioral_priors::CategoricalDistribution;
3144 let tp = if !tp_neighbors.is_empty()
3145 && tp_share_prob > 0.0
3146 && rng_ref.random_range(0.0..1.0) < tp_share_prob
3147 {
3148 let filtered: std::collections::BTreeMap<String, f64> = priors
3149 .per_source_attribute
3150 .as_ref()
3151 .and_then(|psa| psa.conditional(code, "trading_partner"))
3152 .map(|dist| {
3153 dist.probabilities
3154 .iter()
3155 .filter(|(v, _)| tp_neighbors.contains(v))
3156 .map(|(v, p)| (v.clone(), *p))
3157 .collect()
3158 })
3159 .unwrap_or_default();
3160 if filtered.is_empty() {
3161 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3162 } else {
3163 let neighbour_dist = CategoricalDistribution {
3164 probabilities: filtered,
3165 n: 0,
3166 };
3167 neighbour_dist.sample(rng_ref).or_else(|| {
3168 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3169 })
3170 }
3171 } else {
3172 priors.sample_attribute_for_source(code, "trading_partner", rng_ref)
3173 };
3174 header.trading_partner = tp;
3175 }
3176 if let Some(ref tp) = header.trading_partner {
3177 self.last_tp_by_source.insert(code.clone(), tp.clone());
3178 }
3179 }
3180 }
3181
3182 let (created_by, created_at) = {
3184 let sap_code_for_user = header.sap_source_code.clone();
3185 if let (Some(ref code), Some(ref priors)) = (sap_code_for_user, &self.loaded_priors) {
3186 if let Some(uid) = priors.sample_user_for_source(code, &mut self.rng) {
3187 let new_created_at = if let Some((hour, _)) =
3188 priors.sample_timestamp_for_user(&uid, &mut self.rng)
3189 {
3190 let base = header.created_at;
3191 base.date_naive()
3192 .and_hms_opt(hour, 0, 0)
3193 .map(|naive| naive.and_utc())
3194 .unwrap_or(base)
3195 } else {
3196 header.created_at
3197 };
3198 (uid, new_created_at)
3199 } else {
3200 (created_by, header.created_at)
3201 }
3202 } else {
3203 (created_by, header.created_at)
3204 }
3205 };
3206
3207 header.created_by = created_by;
3208 header.created_at = created_at;
3209 header.user_persona = user_persona;
3210 header.business_process = Some(business_process);
3211 header.document_type = Self::document_type_for_process(business_process).to_string();
3212
3213 header.source_document = Some(DocumentRef::Manual);
3215
3216 header.is_manual = true;
3218 header.source_system = if self.rng.random::<f64>() < 0.70 {
3219 "manual".to_string()
3220 } else {
3221 "spreadsheet".to_string()
3222 };
3223 header.is_post_close = posting_date.month() == self.end_date.month()
3224 && posting_date.year() == self.end_date.year()
3225 && posting_date.day() > 25;
3226 header.created_date =
3227 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
3228
3229 let variation = self.rng.random_range(-0.15..0.15);
3231 let varied_amount =
3232 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
3233 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
3234
3235 let mut entry = JournalEntry::new(header);
3237
3238 let debit_line = JournalEntryLine::debit(
3240 entry.header.document_id,
3241 1,
3242 batch.base_account_number.clone(),
3243 total_amount,
3244 );
3245 entry.add_line(debit_line);
3246
3247 let credit_fallback = self.select_credit_account().account_number.clone();
3254 let credit_account = {
3255 let priors_opt = &mut self.loaded_priors;
3256 let rng_ref = &mut self.rng;
3257 if let Some(priors) = priors_opt {
3258 let sp46_gl = entry
3261 .header
3262 .sap_source_code
3263 .as_deref()
3264 .and_then(|code| priors.sample_gl_for_source_role(code, "CR", rng_ref));
3265 if let Some(gl) = sp46_gl {
3266 gl
3267 } else {
3268 let sp37_gl = entry.header.sap_source_code.as_deref().and_then(|code| {
3269 priors.sample_attribute_for_source(code, "gl_account", rng_ref)
3270 });
3271 sp37_gl.unwrap_or(credit_fallback)
3272 }
3273 } else {
3274 credit_fallback
3275 }
3276 };
3277 let credit_line =
3278 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
3279 entry.add_line(credit_line);
3280
3281 self.enrich_line_items(&mut entry);
3283
3284 if self.persona_errors_enabled {
3286 self.maybe_inject_persona_error(&mut entry);
3287 }
3288
3289 if self.approval_enabled {
3291 self.maybe_apply_approval_workflow(&mut entry, posting_date);
3292 }
3293
3294 self.populate_approval_fields(&mut entry, posting_date);
3296
3297 if batch.remaining <= 1 {
3299 self.batch_state = None;
3300 }
3301
3302 entry
3303 }
3304
3305 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
3307 let persona_str = &entry.header.user_persona;
3309 let persona = match persona_str.to_lowercase().as_str() {
3310 s if s.contains("junior") => UserPersona::JuniorAccountant,
3311 s if s.contains("senior") => UserPersona::SeniorAccountant,
3312 s if s.contains("controller") => UserPersona::Controller,
3313 s if s.contains("manager") => UserPersona::Manager,
3314 s if s.contains("executive") => UserPersona::Executive,
3315 _ => return, };
3317
3318 let base_error_rate = persona.error_rate();
3320
3321 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
3323
3324 if self.rng.random::<f64>() >= adjusted_rate {
3326 return; }
3328
3329 self.inject_human_error(entry, persona);
3331 }
3332
3333 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
3342 use chrono::Datelike;
3343
3344 let mut rate = base_rate;
3345 let day = posting_date.day();
3346 let month = posting_date.month();
3347
3348 if month == 12 && day >= 28 {
3350 rate *= 2.0;
3351 return rate.min(0.5); }
3353
3354 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
3356 rate *= 1.75; return rate.min(0.4);
3358 }
3359
3360 if day >= 28 {
3362 rate *= 1.5; }
3364
3365 let weekday = posting_date.weekday();
3367 match weekday {
3368 chrono::Weekday::Mon => {
3369 rate *= 1.2;
3371 }
3372 chrono::Weekday::Fri => {
3373 rate *= 1.3;
3375 }
3376 _ => {}
3377 }
3378
3379 rate.min(0.4)
3381 }
3382
3383 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
3392 use rust_decimal::Decimal;
3393
3394 if amount < Decimal::from(10) {
3396 return amount;
3397 }
3398
3399 if self.rng.random::<f64>() > 0.70 {
3401 return amount;
3402 }
3403
3404 let variation_type: u8 = self.rng.random_range(0..4);
3406
3407 match variation_type {
3408 0 => {
3409 let variation_pct = self.rng.random_range(-0.02..0.02);
3411 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
3412 (amount + variation).round_dp(2)
3413 }
3414 1 => {
3415 let ten = Decimal::from(10);
3417 (amount / ten).round() * ten
3418 }
3419 2 => {
3420 if amount >= Decimal::from(500) {
3422 let hundred = Decimal::from(100);
3423 (amount / hundred).round() * hundred
3424 } else {
3425 amount
3426 }
3427 }
3428 3 => {
3429 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
3431 (amount + cents).max(Decimal::ZERO).round_dp(2)
3432 }
3433 _ => amount,
3434 }
3435 }
3436
3437 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
3443 let balancing_idx = entry.lines.iter().position(|l| {
3445 if modified_was_debit {
3446 l.credit_amount > Decimal::ZERO
3447 } else {
3448 l.debit_amount > Decimal::ZERO
3449 }
3450 });
3451
3452 if let Some(idx) = balancing_idx {
3453 if modified_was_debit {
3454 entry.lines[idx].credit_amount += impact;
3455 } else {
3456 entry.lines[idx].debit_amount += impact;
3457 }
3458 }
3459 }
3460
3461 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
3466 use rust_decimal::Decimal;
3467
3468 let error_type: u8 = match persona {
3470 UserPersona::JuniorAccountant => {
3471 self.rng.random_range(0..5)
3473 }
3474 UserPersona::SeniorAccountant => {
3475 self.rng.random_range(0..3)
3477 }
3478 UserPersona::Controller | UserPersona::Manager => {
3479 self.rng.random_range(3..5)
3481 }
3482 _ => return,
3483 };
3484
3485 match error_type {
3486 0 => {
3487 if let Some(line) = entry.lines.get_mut(0) {
3489 let is_debit = line.debit_amount > Decimal::ZERO;
3490 let original_amount = if is_debit {
3491 line.debit_amount
3492 } else {
3493 line.credit_amount
3494 };
3495
3496 let s = original_amount.to_string();
3498 if s.len() >= 2 {
3499 let chars: Vec<char> = s.chars().collect();
3500 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
3501 if chars[pos].is_ascii_digit()
3502 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
3503 {
3504 let mut new_chars = chars;
3505 new_chars.swap(pos, pos + 1);
3506 if let Ok(new_amount) =
3507 new_chars.into_iter().collect::<String>().parse::<Decimal>()
3508 {
3509 let impact = new_amount - original_amount;
3510
3511 if is_debit {
3513 entry.lines[0].debit_amount = new_amount;
3514 } else {
3515 entry.lines[0].credit_amount = new_amount;
3516 }
3517
3518 Self::rebalance_entry(entry, is_debit, impact);
3520
3521 entry.header.header_text = Some(
3522 entry.header.header_text.clone().unwrap_or_default()
3523 + " [HUMAN_ERROR:TRANSPOSITION]",
3524 );
3525 }
3526 }
3527 }
3528 }
3529 }
3530 1 => {
3531 if let Some(line) = entry.lines.get_mut(0) {
3533 let is_debit = line.debit_amount > Decimal::ZERO;
3534 let original_amount = if is_debit {
3535 line.debit_amount
3536 } else {
3537 line.credit_amount
3538 };
3539
3540 let new_amount = original_amount * Decimal::new(10, 0);
3541 let impact = new_amount - original_amount;
3542
3543 if is_debit {
3545 entry.lines[0].debit_amount = new_amount;
3546 } else {
3547 entry.lines[0].credit_amount = new_amount;
3548 }
3549
3550 Self::rebalance_entry(entry, is_debit, impact);
3552
3553 entry.header.header_text = Some(
3554 entry.header.header_text.clone().unwrap_or_default()
3555 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
3556 );
3557 }
3558 }
3559 2 => {
3560 if let Some(ref mut text) = entry.header.header_text {
3562 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
3563 let correct = ["the", "and", "with", "that", "receive"];
3564 let idx = self.rng.random_range(0..typos.len());
3565 if text.to_lowercase().contains(correct[idx]) {
3566 *text = text.replace(correct[idx], typos[idx]);
3567 *text = format!("{text} [HUMAN_ERROR:TYPO]");
3568 }
3569 }
3570 }
3571 3 => {
3572 if let Some(line) = entry.lines.get_mut(0) {
3574 let is_debit = line.debit_amount > Decimal::ZERO;
3575 let original_amount = if is_debit {
3576 line.debit_amount
3577 } else {
3578 line.credit_amount
3579 };
3580
3581 let new_amount =
3582 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
3583 let impact = new_amount - original_amount;
3584
3585 if is_debit {
3587 entry.lines[0].debit_amount = new_amount;
3588 } else {
3589 entry.lines[0].credit_amount = new_amount;
3590 }
3591
3592 Self::rebalance_entry(entry, is_debit, impact);
3594
3595 entry.header.header_text = Some(
3596 entry.header.header_text.clone().unwrap_or_default()
3597 + " [HUMAN_ERROR:ROUNDED]",
3598 );
3599 }
3600 }
3601 4 if entry.header.document_date == entry.header.posting_date => {
3604 let days_late = self.rng.random_range(5..15);
3605 entry.header.document_date =
3606 entry.header.posting_date - chrono::Duration::days(days_late);
3607 entry.header.header_text = Some(
3608 entry.header.header_text.clone().unwrap_or_default()
3609 + " [HUMAN_ERROR:LATE_POSTING]",
3610 );
3611 }
3612 _ => {}
3613 }
3614 }
3615
3616 fn maybe_apply_approval_workflow(
3621 &mut self,
3622 entry: &mut JournalEntry,
3623 _posting_date: NaiveDate,
3624 ) {
3625 use rust_decimal::Decimal;
3626
3627 let amount = entry.total_debit();
3628
3629 if amount <= self.approval_threshold {
3631 let workflow = ApprovalWorkflow::auto_approved(
3633 entry.header.created_by.clone(),
3634 entry.header.user_persona.clone(),
3635 amount,
3636 entry.header.created_at,
3637 );
3638 entry.header.approval_workflow = Some(workflow);
3639 return;
3640 }
3641
3642 entry.header.sox_relevant = true;
3644
3645 let required_levels = if amount > Decimal::new(100000, 0) {
3647 3 } else if amount > Decimal::new(50000, 0) {
3649 2 } else {
3651 1 };
3653
3654 let mut workflow = ApprovalWorkflow::new(
3656 entry.header.created_by.clone(),
3657 entry.header.user_persona.clone(),
3658 amount,
3659 );
3660 workflow.required_levels = required_levels;
3661
3662 let submit_time = entry.header.created_at;
3664 let submit_action = ApprovalAction::new(
3665 entry.header.created_by.clone(),
3666 entry.header.user_persona.clone(),
3667 self.parse_persona(&entry.header.user_persona),
3668 ApprovalActionType::Submit,
3669 0,
3670 )
3671 .with_timestamp(submit_time);
3672
3673 workflow.actions.push(submit_action);
3674 workflow.status = ApprovalStatus::Pending;
3675 workflow.submitted_at = Some(submit_time);
3676
3677 let mut current_time = submit_time;
3679 for level in 1..=required_levels {
3680 let delay_hours = self.rng.random_range(1..4);
3682 current_time += chrono::Duration::hours(delay_hours);
3683
3684 while current_time.weekday() == chrono::Weekday::Sat
3686 || current_time.weekday() == chrono::Weekday::Sun
3687 {
3688 current_time += chrono::Duration::days(1);
3689 }
3690
3691 let (approver_id, approver_role) = self.select_approver(level);
3693
3694 let approve_action = ApprovalAction::new(
3695 approver_id.clone(),
3696 approver_role.to_string(),
3697 approver_role,
3698 ApprovalActionType::Approve,
3699 level,
3700 )
3701 .with_timestamp(current_time);
3702
3703 workflow.actions.push(approve_action);
3704 workflow.current_level = level;
3705 }
3706
3707 workflow.status = ApprovalStatus::Approved;
3709 workflow.approved_at = Some(current_time);
3710
3711 entry.header.approval_workflow = Some(workflow);
3712 }
3713
3714 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
3716 let persona = match level {
3717 1 => UserPersona::Manager,
3718 2 => UserPersona::Controller,
3719 _ => UserPersona::Executive,
3720 };
3721
3722 if let Some(ref pool) = self.user_pool {
3724 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
3725 return (user.user_id.clone(), persona);
3726 }
3727 }
3728
3729 let approver_id = match persona {
3731 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
3732 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
3733 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
3734 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
3735 };
3736
3737 (approver_id, persona)
3738 }
3739
3740 fn parse_persona(&self, persona_str: &str) -> UserPersona {
3742 match persona_str.to_lowercase().as_str() {
3743 s if s.contains("junior") => UserPersona::JuniorAccountant,
3744 s if s.contains("senior") => UserPersona::SeniorAccountant,
3745 s if s.contains("controller") => UserPersona::Controller,
3746 s if s.contains("manager") => UserPersona::Manager,
3747 s if s.contains("executive") => UserPersona::Executive,
3748 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
3749 _ => UserPersona::JuniorAccountant, }
3751 }
3752
3753 pub fn with_approval(mut self, enabled: bool) -> Self {
3755 self.approval_enabled = enabled;
3756 self
3757 }
3758
3759 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
3761 self.approval_threshold = threshold;
3762 self
3763 }
3764
3765 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
3771 self.sod_violation_rate = rate;
3772 self
3773 }
3774
3775 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
3778 if let Some(ref workflow) = entry.header.approval_workflow {
3779 let last_approver = workflow
3781 .actions
3782 .iter()
3783 .rev()
3784 .find(|a| matches!(a.action, ApprovalActionType::Approve));
3785
3786 if let Some(approver_action) = last_approver {
3787 entry.header.approved_by = Some(approver_action.actor_id.clone());
3788 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
3789 } else {
3790 entry.header.approved_by = Some(workflow.preparer_id.clone());
3792 entry.header.approval_date = Some(posting_date);
3793 }
3794
3795 if self.rng.random::<f64>() < self.sod_violation_rate {
3797 let creator = entry.header.created_by.clone();
3798 entry.header.approved_by = Some(creator);
3799 entry.header.sod_violation = true;
3800 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
3801 }
3802 }
3803 }
3804
3805 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
3811 self.drift_controller = Some(controller);
3812 self
3813 }
3814
3815 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
3820 if config.enabled {
3821 let total_periods = self.calculate_total_periods();
3822 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
3823 }
3824 self
3825 }
3826
3827 fn calculate_total_periods(&self) -> u32 {
3829 let start_year = self.start_date.year();
3830 let start_month = self.start_date.month();
3831 let end_year = self.end_date.year();
3832 let end_month = self.end_date.month();
3833
3834 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
3835 }
3836
3837 fn date_to_period(&self, date: NaiveDate) -> u32 {
3839 let start_year = self.start_date.year();
3840 let start_month = self.start_date.month() as i32;
3841 let date_year = date.year();
3842 let date_month = date.month() as i32;
3843
3844 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
3845 }
3846
3847 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
3849 if let Some(ref controller) = self.drift_controller {
3850 let period = self.date_to_period(date);
3851 controller.compute_adjustments(period)
3852 } else {
3853 DriftAdjustments::none()
3854 }
3855 }
3856
3857 #[inline]
3859 fn select_user(&mut self, is_automated: bool) -> (String, String) {
3860 if let Some(ref pool) = self.user_pool {
3861 let persona = if is_automated {
3862 UserPersona::AutomatedSystem
3863 } else {
3864 let roll: f64 = self.rng.random();
3866 if roll < 0.4 {
3867 UserPersona::JuniorAccountant
3868 } else if roll < 0.7 {
3869 UserPersona::SeniorAccountant
3870 } else if roll < 0.85 {
3871 UserPersona::Controller
3872 } else {
3873 UserPersona::Manager
3874 }
3875 };
3876
3877 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
3878 return (user.user_id.clone(), user.persona.to_string());
3879 }
3880 }
3881
3882 if is_automated {
3884 (
3885 format!("BATCH{:04}", self.rng.random_range(1..=20)),
3886 "automated_system".to_string(),
3887 )
3888 } else {
3889 (
3890 format!("USER{:04}", self.rng.random_range(1..=40)),
3891 "senior_accountant".to_string(),
3892 )
3893 }
3894 }
3895
3896 #[inline]
3898 fn select_source(&mut self) -> TransactionSource {
3899 let roll: f64 = self.rng.random();
3900 let dist = &self.config.source_distribution;
3901
3902 if roll < dist.manual {
3903 TransactionSource::Manual
3904 } else if roll < dist.manual + dist.automated {
3905 TransactionSource::Automated
3906 } else if roll < dist.manual + dist.automated + dist.recurring {
3907 TransactionSource::Recurring
3908 } else {
3909 TransactionSource::Adjustment
3910 }
3911 }
3912
3913 #[inline]
3915 fn document_type_for_process(process: BusinessProcess) -> &'static str {
3924 match process {
3925 BusinessProcess::P2P => "KR",
3926 BusinessProcess::O2C => "DR",
3927 BusinessProcess::R2R => "SA",
3928 BusinessProcess::H2R => "HR",
3929 BusinessProcess::A2R => "AA",
3930 _ => "SA",
3931 }
3932 }
3933
3934 fn select_business_process(&mut self) -> BusinessProcess {
3935 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
3936 }
3937
3938 #[inline]
3943 fn power_law_index(n: usize, rng: &mut ChaCha8Rng) -> Option<usize> {
3944 if n == 0 || n > ZIPF_CAP {
3945 return None;
3946 }
3947 let total = ZIPF_CUM[n];
3948 let r = rng.random::<f64>() * total;
3949 let k = ZIPF_CUM[..=n]
3951 .binary_search_by(|v| v.partial_cmp(&r).unwrap_or(std::cmp::Ordering::Less))
3952 .unwrap_or_else(|e| e);
3953 Some(k.saturating_sub(1).min(n - 1))
3954 }
3955
3956 #[inline]
3963 fn concentrate<'a>(
3964 enabled: bool,
3965 rng: &mut ChaCha8Rng,
3966 all: &[&'a GLAccount],
3967 uniform: Option<&'a GLAccount>,
3968 ) -> Option<&'a GLAccount> {
3969 if enabled {
3970 Self::power_law_index(all.len(), rng)
3971 .map(|i| all[i])
3972 .or(uniform)
3973 } else {
3974 uniform
3975 }
3976 }
3977
3978 fn ensure_cond_pair_pool(&mut self, source: &str) {
3981 let cfg = &self.config.source_conditional_account_pair;
3982 if !cfg.enabled {
3983 return;
3984 }
3985 if self.cond_pair_sampler.is_none() {
3986 self.cond_pair_sampler = Some(Default::default());
3987 }
3988 let sampler = self
3989 .cond_pair_sampler
3990 .as_mut()
3991 .expect("just-initialised above");
3992 if sampler.pool(source).is_some() {
3993 return;
3994 }
3995 let all_accounts: Vec<String> = self
3996 .coa
3997 .accounts
3998 .iter()
3999 .map(|a| a.account_number.clone())
4000 .collect();
4001 if all_accounts.is_empty() {
4002 return;
4003 }
4004 let weights: Vec<f64> = vec![1.0; all_accounts.len()];
4007 sampler.ensure_pool(
4008 source,
4009 &all_accounts,
4010 &weights,
4011 cfg.accts_per_source_target,
4012 cfg.concentration,
4013 &mut self.cond_pair_rng,
4014 );
4015 }
4016
4017 #[inline]
4022 fn try_cond_pick_account_number(&mut self) -> Option<String> {
4023 let cfg = &self.config.source_conditional_account_pair;
4024 if !cfg.enabled {
4025 return None;
4026 }
4027 let src = self.current_je_source.clone()?;
4028 self.ensure_cond_pair_pool(&src);
4029 let sampler = self.cond_pair_sampler.as_ref()?;
4030 let pool = sampler.pool(&src)?;
4031 Some(pool.sample_one(&mut self.cond_pair_rng).to_string())
4032 }
4033
4034 #[inline]
4035 fn select_debit_account(&mut self) -> &GLAccount {
4036 if let Some(acct_num) = self.try_cond_pick_account_number() {
4038 if let Some(a) = self
4039 .coa
4040 .accounts
4041 .iter()
4042 .find(|a| a.account_number == acct_num)
4043 {
4044 return a;
4045 }
4046 }
4048 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
4049 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
4050
4051 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4053 accounts
4054 } else {
4055 expense_accounts
4056 };
4057
4058 let uniform = all.choose(&mut self.rng).copied();
4059 let enabled = self.config.account_concentration.unwrap_or(true);
4060 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4061 tracing::warn!(
4062 "Account selection returned empty list, falling back to first COA account"
4063 );
4064 &self.coa.accounts[0]
4065 })
4066 }
4067
4068 #[inline]
4069 fn select_credit_account(&mut self) -> &GLAccount {
4070 if let Some(acct_num) = self.try_cond_pick_account_number() {
4072 if let Some(a) = self
4073 .coa
4074 .accounts
4075 .iter()
4076 .find(|a| a.account_number == acct_num)
4077 {
4078 return a;
4079 }
4080 }
4081 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
4082 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
4083
4084 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
4086 liability_accounts
4087 } else {
4088 revenue_accounts
4089 };
4090
4091 let uniform = all.choose(&mut self.rng).copied();
4092 let enabled = self.config.account_concentration.unwrap_or(true);
4093 Self::concentrate(enabled, &mut self.account_rng, &all, uniform).unwrap_or_else(|| {
4094 tracing::warn!(
4095 "Account selection returned empty list, falling back to first COA account"
4096 );
4097 &self.coa.accounts[0]
4098 })
4099 }
4100}
4101
4102impl Generator for JournalEntryGenerator {
4103 type Item = JournalEntry;
4104 type Config = (
4105 TransactionConfig,
4106 Arc<ChartOfAccounts>,
4107 Vec<String>,
4108 NaiveDate,
4109 NaiveDate,
4110 );
4111
4112 fn new(config: Self::Config, seed: u64) -> Self {
4113 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
4114 }
4115
4116 fn generate_one(&mut self) -> Self::Item {
4117 self.generate()
4118 }
4119
4120 fn reset(&mut self) {
4121 self.rng = seeded_rng(self.seed, 0);
4122 self.source_mix_rng = seeded_rng(self.seed, 50_063);
4123 self.template_rng = seeded_rng(self.seed, 70_081);
4124 self.recurring_archetypes.clear();
4125 self.reversal_rng = seeded_rng(self.seed, 90_017);
4126 self.reversal_buffer.clear();
4127 self.account_rng = seeded_rng(self.seed, 60_071);
4128 self.allocation_rng = seeded_rng(self.seed, 80_023);
4129 self.fx_rng = seeded_rng(self.seed, 70_093);
4130 self.line_sampler.reset(self.seed + 1);
4131 self.amount_sampler.reset(self.seed + 2);
4132 self.temporal_sampler.reset(self.seed + 3);
4133 if let Some(ref mut adv) = self.advanced_amount_sampler {
4134 adv.reset(self.seed + 2);
4135 }
4136 self.count = 0;
4137 self.uuid_factory.reset();
4138
4139 let mut ref_gen = ReferenceGenerator::new(
4141 self.start_date.year(),
4142 self.companies
4143 .first()
4144 .map(std::string::String::as_str)
4145 .unwrap_or("1000"),
4146 );
4147 ref_gen.set_prefix(
4148 ReferenceType::Invoice,
4149 &self.template_config.references.invoice_prefix,
4150 );
4151 ref_gen.set_prefix(
4152 ReferenceType::PurchaseOrder,
4153 &self.template_config.references.po_prefix,
4154 );
4155 ref_gen.set_prefix(
4156 ReferenceType::SalesOrder,
4157 &self.template_config.references.so_prefix,
4158 );
4159 self.reference_generator = ref_gen;
4160 }
4161
4162 fn count(&self) -> u64 {
4163 self.count
4164 }
4165
4166 fn seed(&self) -> u64 {
4167 self.seed
4168 }
4169}
4170
4171use datasynth_core::traits::ParallelGenerator;
4172
4173impl ParallelGenerator for JournalEntryGenerator {
4174 fn split(self, parts: usize) -> Vec<Self> {
4180 let parts = parts.max(1);
4181 (0..parts)
4182 .map(|i| {
4183 let sub_seed = self
4185 .seed
4186 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
4187
4188 let mut gen = JournalEntryGenerator::new_with_full_config(
4189 self.config.clone(),
4190 Arc::clone(&self.coa),
4191 self.companies.clone(),
4192 self.start_date,
4193 self.end_date,
4194 sub_seed,
4195 self.template_config.clone(),
4196 self.user_pool.clone(),
4197 );
4198
4199 gen.company_selector = self.company_selector.clone();
4201 gen.vendor_pool = self.vendor_pool.clone();
4202 gen.customer_pool = self.customer_pool.clone();
4203 gen.material_pool = self.material_pool.clone();
4204 gen.cost_center_pool = self.cost_center_pool.clone();
4210 gen.profit_center_pool = self.profit_center_pool.clone();
4211 gen.using_real_master_data = self.using_real_master_data;
4212 gen.fraud_config = self.fraud_config.clone();
4213 gen.persona_errors_enabled = self.persona_errors_enabled;
4214 gen.approval_enabled = self.approval_enabled;
4215 gen.approval_threshold = self.approval_threshold;
4216 gen.sod_violation_rate = self.sod_violation_rate;
4217 if let Some(mut adv) = self.advanced_amount_sampler.clone() {
4222 adv.reset(sub_seed.wrapping_add(2));
4223 gen.advanced_amount_sampler = Some(adv);
4224 }
4225 if let Some(mut cond) = self.conditional_amount_override.clone() {
4228 cond.reset(sub_seed.wrapping_add(17));
4229 gen.conditional_amount_override = Some(cond);
4230 }
4231 if let Some(mut cop) = self.correlation_copula.clone() {
4233 cop.reset(sub_seed.wrapping_add(31));
4234 gen.correlation_copula = Some(cop);
4235 }
4236
4237 gen.uuid_factory = DeterministicUuidFactory::for_partition(
4239 sub_seed,
4240 GeneratorType::JournalEntry,
4241 i as u8,
4242 );
4243
4244 if let Some(ref config) = self.temporal_patterns_config {
4246 gen.temporal_patterns_config = Some(config.clone());
4247 if config.business_days.enabled {
4249 if let Some(ref bdc) = self.business_day_calculator {
4250 gen.business_day_calculator = Some(bdc.clone());
4251 }
4252 }
4253 if config.processing_lags.enabled {
4255 let lag_config =
4256 Self::convert_processing_lag_config(&config.processing_lags);
4257 gen.processing_lag_calculator =
4258 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
4259 }
4260 }
4261
4262 if let Some(ref dc) = self.drift_controller {
4264 gen.drift_controller = Some(dc.clone());
4265 }
4266
4267 gen.loaded_priors = self.loaded_priors.clone();
4270
4271 if let Some(ref cal) = self.velocity_calibrator {
4276 let mut fresh = crate::velocity_calibrator::VelocityCalibrator::new(
4277 cal.target_trigger_rates.clone(),
4278 cal.n_lines_between_calibrations,
4279 );
4280 fresh.current_values = cal.current_values.clone();
4281 gen.velocity_calibrator = Some(fresh);
4282 }
4283
4284 gen
4285 })
4286 .collect()
4287 }
4288}
4289
4290#[cfg(test)]
4291mod tests {
4292 use super::*;
4293 use crate::ChartOfAccountsGenerator;
4294
4295 #[test]
4296 fn test_generate_balanced_entries() {
4297 let mut coa_gen =
4298 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4299 let coa = Arc::new(coa_gen.generate());
4300
4301 let mut je_gen = JournalEntryGenerator::new_with_params(
4302 TransactionConfig::default(),
4303 coa,
4304 vec!["1000".to_string()],
4305 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4306 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4307 42,
4308 );
4309
4310 let mut balanced_count = 0;
4311 for _ in 0..100 {
4312 let entry = je_gen.generate();
4313
4314 let has_human_error = entry
4316 .header
4317 .header_text
4318 .as_ref()
4319 .map(|t| t.contains("[HUMAN_ERROR:"))
4320 .unwrap_or(false);
4321
4322 if !has_human_error {
4323 assert!(
4324 entry.is_balanced(),
4325 "Entry {:?} is not balanced",
4326 entry.header.document_id
4327 );
4328 balanced_count += 1;
4329 }
4330 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
4331 }
4332
4333 assert!(
4335 balanced_count >= 80,
4336 "Expected at least 80 balanced entries, got {}",
4337 balanced_count
4338 );
4339 }
4340
4341 #[test]
4342 fn test_deterministic_generation() {
4343 let mut coa_gen =
4344 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4345 let coa = Arc::new(coa_gen.generate());
4346
4347 let mut gen1 = JournalEntryGenerator::new_with_params(
4348 TransactionConfig::default(),
4349 Arc::clone(&coa),
4350 vec!["1000".to_string()],
4351 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4352 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4353 42,
4354 );
4355
4356 let mut gen2 = JournalEntryGenerator::new_with_params(
4357 TransactionConfig::default(),
4358 coa,
4359 vec!["1000".to_string()],
4360 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4361 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4362 42,
4363 );
4364
4365 for _ in 0..50 {
4366 let e1 = gen1.generate();
4367 let e2 = gen2.generate();
4368 assert_eq!(e1.header.document_id, e2.header.document_id);
4369 assert_eq!(e1.total_debit(), e2.total_debit());
4370 }
4371 }
4372
4373 #[test]
4374 fn test_templates_generate_descriptions() {
4375 let mut coa_gen =
4376 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4377 let coa = Arc::new(coa_gen.generate());
4378
4379 let template_config = TemplateConfig {
4381 names: datasynth_config::schema::NameTemplateConfig {
4382 generate_realistic_names: true,
4383 email_domain: "test.com".to_string(),
4384 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
4385 },
4386 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
4387 generate_header_text: true,
4388 generate_line_text: true,
4389 },
4390 references: datasynth_config::schema::ReferenceTemplateConfig {
4391 generate_references: true,
4392 invoice_prefix: "TEST-INV".to_string(),
4393 po_prefix: "TEST-PO".to_string(),
4394 so_prefix: "TEST-SO".to_string(),
4395 },
4396 path: None,
4397 merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
4398 };
4399
4400 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4401 TransactionConfig::default(),
4402 coa,
4403 vec!["1000".to_string()],
4404 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4405 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4406 42,
4407 template_config,
4408 None,
4409 )
4410 .with_persona_errors(false); for _ in 0..10 {
4413 let entry = je_gen.generate();
4414
4415 assert!(
4417 entry.header.header_text.is_some(),
4418 "Header text should be populated"
4419 );
4420
4421 assert!(
4423 entry.header.reference.is_some(),
4424 "Reference should be populated"
4425 );
4426
4427 assert!(
4429 entry.header.business_process.is_some(),
4430 "Business process should be set"
4431 );
4432
4433 for line in &entry.lines {
4435 assert!(line.line_text.is_some(), "Line text should be populated");
4436 }
4437
4438 assert!(entry.is_balanced());
4440 }
4441 }
4442
4443 #[test]
4444 fn test_user_pool_integration() {
4445 let mut coa_gen =
4446 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4447 let coa = Arc::new(coa_gen.generate());
4448
4449 let companies = vec!["1000".to_string()];
4450
4451 let mut user_gen = crate::UserGenerator::new(42);
4453 let user_pool = user_gen.generate_standard(&companies);
4454
4455 let mut je_gen = JournalEntryGenerator::new_with_full_config(
4456 TransactionConfig::default(),
4457 coa,
4458 companies,
4459 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4460 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4461 42,
4462 TemplateConfig::default(),
4463 Some(user_pool),
4464 );
4465
4466 for _ in 0..20 {
4468 let entry = je_gen.generate();
4469
4470 assert!(!entry.header.created_by.is_empty());
4473 }
4474 }
4475
4476 #[test]
4477 fn test_master_data_connection() {
4478 let mut coa_gen =
4479 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4480 let coa = Arc::new(coa_gen.generate());
4481
4482 let vendors = vec![
4484 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
4485 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
4486 ];
4487
4488 let customers = vec![
4490 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
4491 Customer::new(
4492 "C-TEST-002",
4493 "Test Customer Two",
4494 CustomerType::SmallBusiness,
4495 ),
4496 ];
4497
4498 let materials = vec![Material::new(
4500 "MAT-TEST-001",
4501 "Test Material A",
4502 MaterialType::RawMaterial,
4503 )];
4504
4505 let generator = JournalEntryGenerator::new_with_params(
4507 TransactionConfig::default(),
4508 coa,
4509 vec!["1000".to_string()],
4510 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4511 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4512 42,
4513 );
4514
4515 assert!(!generator.is_using_real_master_data());
4517
4518 let generator_with_data = generator
4520 .with_vendors(&vendors)
4521 .with_customers(&customers)
4522 .with_materials(&materials);
4523
4524 assert!(generator_with_data.is_using_real_master_data());
4526 }
4527
4528 #[test]
4529 fn test_with_master_data_convenience_method() {
4530 let mut coa_gen =
4531 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4532 let coa = Arc::new(coa_gen.generate());
4533
4534 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
4535 let customers = vec![Customer::new(
4536 "C-001",
4537 "Customer One",
4538 CustomerType::Corporate,
4539 )];
4540 let materials = vec![Material::new(
4541 "MAT-001",
4542 "Material One",
4543 MaterialType::RawMaterial,
4544 )];
4545
4546 let generator = JournalEntryGenerator::new_with_params(
4547 TransactionConfig::default(),
4548 coa,
4549 vec!["1000".to_string()],
4550 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4551 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4552 42,
4553 )
4554 .with_master_data(&vendors, &customers, &materials);
4555
4556 assert!(generator.is_using_real_master_data());
4557 }
4558
4559 #[test]
4560 fn test_stress_factors_increase_error_rate() {
4561 let mut coa_gen =
4562 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4563 let coa = Arc::new(coa_gen.generate());
4564
4565 let generator = JournalEntryGenerator::new_with_params(
4566 TransactionConfig::default(),
4567 coa,
4568 vec!["1000".to_string()],
4569 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4570 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4571 42,
4572 );
4573
4574 let base_rate = 0.1;
4575
4576 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
4579 assert!(
4580 (regular_rate - base_rate).abs() < 0.01,
4581 "Regular day should have minimal stress factor adjustment"
4582 );
4583
4584 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
4587 assert!(
4588 month_end_rate > regular_rate,
4589 "Month end should have higher error rate than regular day"
4590 );
4591
4592 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
4595 assert!(
4596 year_end_rate > month_end_rate,
4597 "Year end should have highest error rate"
4598 );
4599
4600 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
4603 assert!(
4604 friday_rate > regular_rate,
4605 "Friday should have higher error rate than mid-week"
4606 );
4607
4608 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
4611 assert!(
4612 monday_rate > regular_rate,
4613 "Monday should have higher error rate than mid-week"
4614 );
4615 }
4616
4617 #[test]
4618 fn test_batching_produces_similar_entries() {
4619 let mut coa_gen =
4620 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4621 let coa = Arc::new(coa_gen.generate());
4622
4623 let mut je_gen = JournalEntryGenerator::new_with_params(
4625 TransactionConfig::default(),
4626 coa,
4627 vec!["1000".to_string()],
4628 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4629 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4630 123,
4631 )
4632 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
4636
4637 for entry in &entries {
4639 assert!(
4640 entry.is_balanced(),
4641 "All entries including batched should be balanced"
4642 );
4643 }
4644
4645 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
4647 std::collections::HashMap::new();
4648 for entry in &entries {
4649 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
4650 }
4651
4652 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
4654 assert!(
4655 dates_with_multiple > 0,
4656 "With batching, should see some dates with multiple entries"
4657 );
4658 }
4659
4660 #[test]
4661 fn test_temporal_patterns_business_days() {
4662 use datasynth_config::schema::{
4663 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
4664 };
4665
4666 let mut coa_gen =
4667 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4668 let coa = Arc::new(coa_gen.generate());
4669
4670 let temporal_config = TemporalPatternsConfig {
4672 enabled: true,
4673 business_days: BusinessDaySchemaConfig {
4674 enabled: true,
4675 ..Default::default()
4676 },
4677 calendars: CalendarSchemaConfig {
4678 regions: vec!["US".to_string()],
4679 custom_holidays: vec![],
4680 },
4681 ..Default::default()
4682 };
4683
4684 let mut je_gen = JournalEntryGenerator::new_with_params(
4685 TransactionConfig::default(),
4686 coa,
4687 vec!["1000".to_string()],
4688 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4689 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
4691 )
4692 .with_temporal_patterns(temporal_config, 42)
4693 .with_persona_errors(false);
4694
4695 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
4697
4698 for entry in &entries {
4699 let weekday = entry.header.posting_date.weekday();
4700 assert!(
4701 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
4702 "Posting date {:?} should not be a weekend",
4703 entry.header.posting_date
4704 );
4705 }
4706 }
4707
4708 #[test]
4709 fn test_default_generation_filters_weekends() {
4710 let mut coa_gen =
4714 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4715 let coa = Arc::new(coa_gen.generate());
4716
4717 let mut je_gen = JournalEntryGenerator::new_with_params(
4718 TransactionConfig::default(),
4719 coa,
4720 vec!["1000".to_string()],
4721 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4722 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4723 42,
4724 )
4725 .with_persona_errors(false);
4726
4727 let total = 500;
4728 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4729
4730 let weekend_count = entries
4731 .iter()
4732 .filter(|e| {
4733 let wd = e.header.posting_date.weekday();
4734 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
4735 })
4736 .count();
4737
4738 let weekend_pct = weekend_count as f64 / total as f64;
4739 assert!(
4740 weekend_pct < 0.05,
4741 "Expected weekend entries <5% of total without temporal_patterns enabled, \
4742 but got {:.1}% ({}/{})",
4743 weekend_pct * 100.0,
4744 weekend_count,
4745 total
4746 );
4747 }
4748
4749 #[test]
4750 fn test_document_type_derived_from_business_process() {
4751 let mut coa_gen =
4752 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4753 let coa = Arc::new(coa_gen.generate());
4754
4755 let mut je_gen = JournalEntryGenerator::new_with_params(
4756 TransactionConfig::default(),
4757 coa,
4758 vec!["1000".to_string()],
4759 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4760 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4761 99,
4762 )
4763 .with_persona_errors(false)
4764 .with_batching(false);
4765
4766 let total = 200;
4767 let mut doc_types = std::collections::HashSet::new();
4768 let mut sa_count = 0_usize;
4769
4770 for _ in 0..total {
4771 let entry = je_gen.generate();
4772 let dt = &entry.header.document_type;
4773 doc_types.insert(dt.clone());
4774 if dt == "SA" {
4775 sa_count += 1;
4776 }
4777 }
4778
4779 assert!(
4781 doc_types.len() > 3,
4782 "Expected >3 distinct document types, got {} ({:?})",
4783 doc_types.len(),
4784 doc_types,
4785 );
4786
4787 let sa_pct = sa_count as f64 / total as f64;
4789 assert!(
4790 sa_pct < 0.50,
4791 "Expected SA <50%, got {:.1}% ({}/{})",
4792 sa_pct * 100.0,
4793 sa_count,
4794 total,
4795 );
4796 }
4797
4798 #[test]
4799 fn test_enrich_line_items_account_description() {
4800 let mut coa_gen =
4801 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4802 let coa = Arc::new(coa_gen.generate());
4803
4804 let mut je_gen = JournalEntryGenerator::new_with_params(
4805 TransactionConfig::default(),
4806 coa,
4807 vec!["1000".to_string()],
4808 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4809 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4810 42,
4811 )
4812 .with_persona_errors(false);
4813
4814 let total = 200;
4815 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4816
4817 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
4819 let lines_with_desc: usize = entries
4820 .iter()
4821 .flat_map(|e| &e.lines)
4822 .filter(|l| l.account_description.is_some())
4823 .count();
4824
4825 let desc_pct = lines_with_desc as f64 / total_lines as f64;
4826 assert!(
4827 desc_pct > 0.95,
4828 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
4829 desc_pct * 100.0,
4830 lines_with_desc,
4831 total_lines,
4832 );
4833 }
4834
4835 #[test]
4836 fn test_enrich_line_items_cost_center_for_expense_accounts() {
4837 let mut coa_gen =
4838 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4839 let coa = Arc::new(coa_gen.generate());
4840
4841 let mut je_gen = JournalEntryGenerator::new_with_params(
4842 TransactionConfig::default(),
4843 coa,
4844 vec!["1000".to_string()],
4845 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4846 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4847 42,
4848 )
4849 .with_persona_errors(false);
4850
4851 let total = 300;
4852 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4853
4854 let expense_lines: Vec<&JournalEntryLine> = entries
4856 .iter()
4857 .flat_map(|e| &e.lines)
4858 .filter(|l| {
4859 let first = l.gl_account.chars().next().unwrap_or('0');
4860 first == '5' || first == '6'
4861 })
4862 .collect();
4863
4864 if !expense_lines.is_empty() {
4865 let with_cc = expense_lines
4866 .iter()
4867 .filter(|l| l.cost_center.is_some())
4868 .count();
4869 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
4870 assert!(
4871 cc_pct > 0.80,
4872 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
4873 cc_pct * 100.0,
4874 with_cc,
4875 expense_lines.len(),
4876 );
4877 }
4878 }
4879
4880 #[test]
4881 fn test_enrich_line_items_profit_center_and_line_text() {
4882 let mut coa_gen =
4883 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4884 let coa = Arc::new(coa_gen.generate());
4885
4886 let mut je_gen = JournalEntryGenerator::new_with_params(
4887 TransactionConfig::default(),
4888 coa,
4889 vec!["1000".to_string()],
4890 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4891 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4892 42,
4893 )
4894 .with_persona_errors(false);
4895
4896 let total = 100;
4897 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4898
4899 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
4900
4901 let with_pc = entries
4903 .iter()
4904 .flat_map(|e| &e.lines)
4905 .filter(|l| l.profit_center.is_some())
4906 .count();
4907 let pc_pct = with_pc as f64 / total_lines as f64;
4908 assert!(
4909 pc_pct > 0.95,
4910 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
4911 pc_pct * 100.0,
4912 with_pc,
4913 total_lines,
4914 );
4915
4916 let with_text = entries
4918 .iter()
4919 .flat_map(|e| &e.lines)
4920 .filter(|l| l.line_text.is_some())
4921 .count();
4922 let text_pct = with_text as f64 / total_lines as f64;
4923 assert!(
4924 text_pct > 0.95,
4925 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
4926 text_pct * 100.0,
4927 with_text,
4928 total_lines,
4929 );
4930 }
4931
4932 #[test]
4935 fn test_je_has_audit_flags() {
4936 let mut coa_gen =
4937 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4938 let coa = Arc::new(coa_gen.generate());
4939
4940 let mut je_gen = JournalEntryGenerator::new_with_params(
4941 TransactionConfig::default(),
4942 coa,
4943 vec!["1000".to_string()],
4944 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4945 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4946 42,
4947 )
4948 .with_persona_errors(false);
4949
4950 for _ in 0..100 {
4951 let entry = je_gen.generate();
4952
4953 assert!(
4955 !entry.header.source_system.is_empty(),
4956 "source_system should be populated, got empty string"
4957 );
4958
4959 assert!(
4961 !entry.header.created_by.is_empty(),
4962 "created_by should be populated"
4963 );
4964
4965 assert!(
4967 entry.header.created_date.is_some(),
4968 "created_date should be populated"
4969 );
4970 }
4971 }
4972
4973 #[test]
4974 fn test_manual_entry_rate() {
4975 let mut coa_gen =
4976 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
4977 let coa = Arc::new(coa_gen.generate());
4978
4979 let mut je_gen = JournalEntryGenerator::new_with_params(
4980 TransactionConfig::default(),
4981 coa,
4982 vec!["1000".to_string()],
4983 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
4984 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4985 42,
4986 )
4987 .with_persona_errors(false)
4988 .with_batching(false);
4989
4990 let total = 1000;
4991 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
4992
4993 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
4994 let manual_rate = manual_count as f64 / total as f64;
4995
4996 assert!(
4999 manual_rate > 0.01 && manual_rate < 0.50,
5000 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
5001 manual_rate * 100.0,
5002 manual_count,
5003 total,
5004 );
5005
5006 for entry in &entries {
5008 let source_is_manual = entry.header.source == TransactionSource::Manual;
5009 assert_eq!(
5010 entry.header.is_manual, source_is_manual,
5011 "is_manual should match source == Manual"
5012 );
5013 }
5014 }
5015
5016 #[test]
5017 fn test_manual_source_consistency() {
5018 let mut coa_gen =
5019 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5020 let coa = Arc::new(coa_gen.generate());
5021
5022 let mut je_gen = JournalEntryGenerator::new_with_params(
5023 TransactionConfig::default(),
5024 coa,
5025 vec!["1000".to_string()],
5026 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5027 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5028 42,
5029 )
5030 .with_persona_errors(false)
5031 .with_batching(false);
5032
5033 for _ in 0..500 {
5034 let entry = je_gen.generate();
5035
5036 if entry.header.is_manual {
5037 let s = entry.header.source_system.as_str();
5042 assert!(
5043 s == "manual"
5044 || s == "spreadsheet"
5045 || s.starts_with("manual/")
5046 || s.starts_with("spreadsheet/"),
5047 "Manual entry should have source_system in `manual` / `spreadsheet` family, got '{s}'",
5048 );
5049 } else {
5050 let s = entry.header.source_system.as_str();
5052 assert!(
5053 !(s == "manual"
5054 || s == "spreadsheet"
5055 || s.starts_with("manual/")
5056 || s.starts_with("spreadsheet/")),
5057 "Non-manual entry should not be in `manual` / `spreadsheet` family, got '{s}'",
5058 );
5059 }
5060 }
5061 }
5062
5063 #[test]
5064 fn test_default_source_codes_breadth() {
5065 let mut coa_gen =
5070 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 7);
5071 let coa = Arc::new(coa_gen.generate());
5072 let mut je_gen = JournalEntryGenerator::new_with_params(
5073 TransactionConfig::default(),
5074 coa,
5075 vec!["1000".to_string()],
5076 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5077 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5078 7,
5079 )
5080 .with_persona_errors(false)
5081 .with_batching(false);
5082
5083 let mut codes = std::collections::HashSet::new();
5084 for _ in 0..500 {
5085 let e = je_gen.generate();
5086 let code = e
5087 .header
5088 .sap_source_code
5089 .expect("default config should populate sap_source_code");
5090 codes.insert(code);
5091 }
5092 assert!(
5093 codes.len() >= 10,
5094 "default source-mix should be broad (>=10 distinct codes), got {}",
5095 codes.len()
5096 );
5097 }
5098
5099 #[test]
5100 fn test_source_codes_opt_out() {
5101 let mut coa_gen =
5104 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 9);
5105 let coa = Arc::new(coa_gen.generate());
5106 let cfg = TransactionConfig {
5107 synthetic_source_codes: Some(false),
5108 ..TransactionConfig::default()
5109 };
5110 let mut je_gen = JournalEntryGenerator::new_with_params(
5111 cfg,
5112 coa,
5113 vec!["1000".to_string()],
5114 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5115 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5116 9,
5117 )
5118 .with_persona_errors(false)
5119 .with_batching(false);
5120 for _ in 0..50 {
5121 let e = je_gen.generate();
5122 assert!(
5123 e.header.sap_source_code.is_none(),
5124 "opt-out should leave sap_source_code None (legacy enum source)"
5125 );
5126 }
5127 }
5128
5129 #[test]
5130 fn test_recurring_templates_reuse_archetypes() {
5131 fn run(recurring: Option<bool>) -> (usize, usize, bool) {
5135 let mut coa_gen = ChartOfAccountsGenerator::new(
5136 CoAComplexity::Medium,
5137 IndustrySector::Manufacturing,
5138 11,
5139 );
5140 let coa = Arc::new(coa_gen.generate());
5141 let cfg = TransactionConfig {
5142 recurring_templates: recurring,
5143 ..TransactionConfig::default()
5144 };
5145 let mut g = JournalEntryGenerator::new_with_params(
5146 cfg,
5147 coa,
5148 vec!["1000".to_string()],
5149 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5150 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5151 11,
5152 )
5153 .with_persona_errors(false)
5154 .with_batching(false);
5155 let n = 800;
5156 let mut arche = std::collections::HashSet::new();
5157 let mut balanced = true;
5158 for _ in 0..n {
5159 let e = g.generate();
5160 if !e.is_balanced() {
5161 balanced = false;
5162 }
5163 let mut sig: Vec<(String, bool)> = e
5164 .lines
5165 .iter()
5166 .map(|l| (l.gl_account.clone(), l.debit_amount > Decimal::ZERO))
5167 .collect();
5168 sig.sort();
5169 arche.insert(sig);
5170 }
5171 (n, arche.len(), balanced)
5172 }
5173 let (n, distinct_on, bal_on) = run(Some(true));
5174 let (_, distinct_off, bal_off) = run(Some(false));
5175 assert!(bal_on && bal_off, "balance preserved in both modes");
5176 assert!(
5177 distinct_on < distinct_off,
5178 "templating should reduce distinct archetypes ({distinct_on} on vs {distinct_off} off)"
5179 );
5180 assert!(
5181 distinct_on * 2 < n,
5182 "templating should reuse heavily: {distinct_on} distinct archetypes over {n} JEs"
5183 );
5184 }
5185
5186 #[test]
5187 fn test_reversal_process_emits_balanced_reversals() {
5188 fn run(rate: Option<f64>) -> (usize, bool) {
5191 let mut coa_gen = ChartOfAccountsGenerator::new(
5192 CoAComplexity::Small,
5193 IndustrySector::Manufacturing,
5194 13,
5195 );
5196 let coa = Arc::new(coa_gen.generate());
5197 let cfg = TransactionConfig {
5198 reversal_rate: rate,
5199 ..TransactionConfig::default()
5200 };
5201 let mut g = JournalEntryGenerator::new_with_params(
5202 cfg,
5203 coa,
5204 vec!["1000".to_string()],
5205 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5206 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5207 13,
5208 )
5209 .with_persona_errors(false)
5210 .with_batching(false);
5211 let mut reversals = 0;
5212 let mut balanced = true;
5213 for _ in 0..1000 {
5214 let e = g.generate();
5215 if !e.is_balanced() {
5216 balanced = false;
5217 }
5218 if e.header
5219 .header_text
5220 .as_deref()
5221 .is_some_and(|t| t.starts_with("Reversal of"))
5222 {
5223 reversals += 1;
5224 }
5225 }
5226 (reversals, balanced)
5227 }
5228 let (rev_on, bal_on) = run(Some(0.05));
5229 let (rev_off, bal_off) = run(Some(0.0));
5230 assert!(bal_on && bal_off, "all entries balanced incl. reversals");
5231 assert_eq!(rev_off, 0, "rate 0.0 emits no reversals, got {rev_off}");
5232 assert!(rev_on > 0, "rate 0.05 should emit reversals, got {rev_on}");
5233 }
5234
5235 #[test]
5236 fn test_account_concentration_creates_pareto() {
5237 fn run(concentration: Option<bool>) -> (f64, bool) {
5242 let mut coa_gen = ChartOfAccountsGenerator::new(
5243 CoAComplexity::Medium,
5244 IndustrySector::Manufacturing,
5245 17,
5246 );
5247 let coa = Arc::new(coa_gen.generate());
5248 let cfg = TransactionConfig {
5249 account_concentration: concentration,
5250 recurring_templates: Some(false),
5251 reversal_rate: Some(0.0),
5252 ..TransactionConfig::default()
5253 };
5254 let mut g = JournalEntryGenerator::new_with_params(
5255 cfg,
5256 coa,
5257 vec!["1000".to_string()],
5258 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5259 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5260 17,
5261 )
5262 .with_persona_errors(false)
5263 .with_batching(false);
5264 let mut counts: std::collections::HashMap<String, usize> =
5265 std::collections::HashMap::new();
5266 let mut total_lines = 0usize;
5267 let mut balanced = true;
5268 for _ in 0..1000 {
5269 let e = g.generate();
5270 if !e.is_balanced() {
5271 balanced = false;
5272 }
5273 for l in &e.lines {
5274 *counts.entry(l.gl_account.clone()).or_default() += 1;
5275 total_lines += 1;
5276 }
5277 }
5278 let mut v: Vec<usize> = counts.values().copied().collect();
5281 v.sort_unstable_by(|a, b| b.cmp(a));
5282 let top_k = ((v.len() as f64 * 0.10).ceil() as usize).max(1);
5283 let top_share = v.iter().take(top_k).sum::<usize>() as f64 / total_lines as f64;
5284 (top_share, balanced)
5285 }
5286 let (share_on, bal_on) = run(Some(true));
5287 let (share_off, bal_off) = run(Some(false));
5288 assert!(bal_on && bal_off, "balance preserved in both modes");
5289 assert!(
5290 share_on > share_off + 0.20,
5291 "concentration should raise the top-10% line share ({share_on:.3} on vs {share_off:.3} off)"
5292 );
5293 assert!(
5294 share_on > 0.50,
5295 "hot accounts should dominate: top-10% line share {share_on:.3}"
5296 );
5297 }
5298
5299 #[test]
5300 fn test_allocation_batch_emits_large_balanced_postings() {
5301 fn run(rate: Option<f64>) -> (usize, bool, usize) {
5306 let mut coa_gen = ChartOfAccountsGenerator::new(
5307 CoAComplexity::Small,
5308 IndustrySector::Manufacturing,
5309 23,
5310 );
5311 let coa = Arc::new(coa_gen.generate());
5312 let cfg = TransactionConfig {
5313 allocation_batch_rate: rate,
5314 reversal_rate: Some(0.0),
5315 ..TransactionConfig::default()
5316 };
5317 let mut g = JournalEntryGenerator::new_with_params(
5318 cfg,
5319 coa,
5320 vec!["1000".to_string()],
5321 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5322 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5323 23,
5324 )
5325 .with_persona_errors(false)
5326 .with_batching(false);
5327 let mut batches = 0usize;
5328 let mut balanced = true;
5329 let mut max_distinct_cc = 0usize;
5330 for _ in 0..2000 {
5331 let e = g.generate();
5332 if !e.is_balanced() {
5333 balanced = false;
5334 }
5335 if e.header.sap_source_code.as_deref() == Some("AB") {
5336 batches += 1;
5337 assert!(
5338 e.lines.len() >= ALLOCATION_MIN_TARGETS as usize,
5339 "allocation batch should be large, got {} lines",
5340 e.lines.len()
5341 );
5342 let ccs: std::collections::HashSet<String> = e
5343 .lines
5344 .iter()
5345 .filter_map(|l| l.cost_center.clone())
5346 .collect();
5347 max_distinct_cc = max_distinct_cc.max(ccs.len());
5348 }
5349 }
5350 (batches, balanced, max_distinct_cc)
5351 }
5352 let (on, bal_on, cc) = run(Some(0.10));
5353 let (off, bal_off, _) = run(Some(0.0));
5354 assert!(
5355 bal_on && bal_off,
5356 "all entries balanced incl. allocation batches"
5357 );
5358 assert_eq!(off, 0, "rate 0.0 emits no allocation batches, got {off}");
5359 assert!(on > 0, "rate 0.10 should emit allocation batches, got {on}");
5360 assert!(
5361 cc > 1,
5362 "allocation should spread across multiple cost centers, got {cc}"
5363 );
5364 }
5365
5366 #[test]
5367 fn test_derived_id_processes_keep_document_ids_unique() {
5368 let mut coa_gen =
5373 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 31);
5374 let coa = Arc::new(coa_gen.generate());
5375 let cfg = TransactionConfig {
5376 reversal_rate: Some(0.15),
5377 allocation_batch_rate: Some(0.10),
5378 ..TransactionConfig::default()
5379 };
5380 let mut g = JournalEntryGenerator::new_with_params(
5381 cfg,
5382 coa,
5383 vec!["1000".to_string()],
5384 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5385 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5386 31,
5387 )
5388 .with_persona_errors(false)
5389 .with_batching(false);
5390 let mut ids = std::collections::HashSet::new();
5391 let n = 3000;
5392 for _ in 0..n {
5393 let e = g.generate();
5394 assert!(
5395 ids.insert(e.header.document_id),
5396 "duplicate document id {} (derived-id collision)",
5397 e.header.document_id
5398 );
5399 }
5400 assert_eq!(ids.len(), n, "all {n} document ids unique");
5401 }
5402
5403 #[test]
5404 fn test_business_unit_rolls_up_from_cost_center() {
5405 fn run(enabled: Option<bool>) -> (usize, usize, bool, bool) {
5410 let mut coa_gen = ChartOfAccountsGenerator::new(
5411 CoAComplexity::Medium,
5412 IndustrySector::Manufacturing,
5413 19,
5414 );
5415 let coa = Arc::new(coa_gen.generate());
5416 let cfg = TransactionConfig {
5417 business_unit_dimension: enabled,
5418 ..TransactionConfig::default()
5419 };
5420 let mut g = JournalEntryGenerator::new_with_params(
5421 cfg,
5422 coa,
5423 vec!["1000".to_string()],
5424 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5425 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5426 19,
5427 )
5428 .with_persona_errors(false)
5429 .with_batching(false);
5430 let mut dim_lines = 0usize;
5431 let mut bu_lines = 0usize;
5432 let mut consistent = true; let mut well_formed = true; let mut dim_to_bu: std::collections::HashMap<String, String> =
5435 std::collections::HashMap::new();
5436 for _ in 0..600 {
5437 let e = g.generate();
5438 for l in &e.lines {
5439 let dim = l.cost_center.as_deref().or(l.profit_center.as_deref());
5441 if dim.is_some() {
5442 dim_lines += 1;
5443 }
5444 if let Some(bu) = &l.business_unit {
5445 bu_lines += 1;
5446 let d = dim.unwrap_or_default().to_string();
5447 if bu != &JournalEntryGenerator::business_unit_for_dimension(&d) {
5448 consistent = false;
5449 }
5450 if dim_to_bu
5452 .insert(d, bu.clone())
5453 .is_some_and(|prev| &prev != bu)
5454 {
5455 consistent = false;
5456 }
5457 let n_ok = bu.strip_prefix("BU").and_then(|d| d.parse::<u32>().ok());
5458 if !matches!(n_ok, Some(1..=11)) {
5459 well_formed = false;
5460 }
5461 }
5462 }
5463 }
5464 (dim_lines, bu_lines, consistent, well_formed)
5465 }
5466 let (dim_on, bu_on, consistent, well_formed) = run(Some(true));
5467 let (_, bu_off, _, _) = run(Some(false));
5468 assert!(
5469 dim_on > 0 && bu_on > 0,
5470 "BU should be populated where CC/PC is"
5471 );
5472 assert_eq!(
5473 dim_on, bu_on,
5474 "every CC/PC-bearing line gets a BU ({dim_on} dim vs {bu_on} BU)"
5475 );
5476 assert!(
5477 consistent,
5478 "BU must be the deterministic roll-up of its CC/PC"
5479 );
5480 assert!(well_formed, "BU codes must be BU01..BU11");
5481 assert_eq!(bu_off, 0, "dimension off ⇒ no business_unit, got {bu_off}");
5482 }
5483
5484 #[test]
5485 fn test_foreign_currency_sap_style() {
5486 fn run(rate: Option<f64>) -> (usize, bool, bool) {
5492 let mut coa_gen = ChartOfAccountsGenerator::new(
5493 CoAComplexity::Small,
5494 IndustrySector::Manufacturing,
5495 29,
5496 );
5497 let coa = Arc::new(coa_gen.generate());
5498 let cfg = TransactionConfig {
5499 foreign_currency_rate: rate,
5500 reversal_rate: Some(0.0),
5501 allocation_batch_rate: Some(0.0),
5502 ..TransactionConfig::default()
5503 };
5504 let mut g = JournalEntryGenerator::new_with_params(
5505 cfg,
5506 coa,
5507 vec!["1000".to_string()],
5508 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5509 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5510 29,
5511 )
5512 .with_persona_errors(false)
5513 .with_batching(false);
5514 let mut foreign = 0usize;
5515 let mut ledger_ok = true; let mut txn_ok = true; for _ in 0..1500 {
5518 let e = g.generate();
5519 if !e.is_balanced() {
5520 ledger_ok = false;
5521 }
5522 if e.header.currency != "USD" {
5523 foreign += 1;
5524 if !e.lines.iter().all(|l| l.transaction_amount.is_some()) {
5525 txn_ok = false;
5526 }
5527 let td: Decimal = e
5528 .lines
5529 .iter()
5530 .filter(|l| l.debit_amount > Decimal::ZERO)
5531 .filter_map(|l| l.transaction_amount)
5532 .sum();
5533 let tc: Decimal = e
5534 .lines
5535 .iter()
5536 .filter(|l| l.credit_amount > Decimal::ZERO)
5537 .filter_map(|l| l.transaction_amount)
5538 .sum();
5539 let tol = Decimal::new(e.lines.len() as i64, 2);
5541 if (td - tc).abs() > tol {
5542 txn_ok = false;
5543 }
5544 }
5545 }
5546 (foreign, ledger_ok, txn_ok)
5547 }
5548 let (fon, lbal_on, tbal_on) = run(Some(0.20));
5549 let (foff, lbal_off, _) = run(Some(0.0));
5550 assert!(
5551 lbal_on && lbal_off,
5552 "ledger balance (debit==credit) preserved in both modes"
5553 );
5554 assert!(
5555 fon > 0,
5556 "rate 0.20 should produce foreign-currency JEs, got {fon}"
5557 );
5558 assert_eq!(foff, 0, "rate 0.0 ⇒ no foreign JEs, got {foff}");
5559 assert!(
5560 tbal_on,
5561 "foreign JEs carry transaction_amount + balance in the transaction currency"
5562 );
5563 }
5564
5565 #[test]
5566 fn test_created_date_before_posting() {
5567 let mut coa_gen =
5568 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5569 let coa = Arc::new(coa_gen.generate());
5570
5571 let mut je_gen = JournalEntryGenerator::new_with_params(
5572 TransactionConfig::default(),
5573 coa,
5574 vec!["1000".to_string()],
5575 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5576 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5577 42,
5578 )
5579 .with_persona_errors(false);
5580
5581 for _ in 0..500 {
5582 let entry = je_gen.generate();
5583
5584 if let Some(created_date) = entry.header.created_date {
5585 let created_naive_date = created_date.date();
5586 assert!(
5587 created_naive_date <= entry.header.posting_date,
5588 "created_date ({}) should be <= posting_date ({})",
5589 created_naive_date,
5590 entry.header.posting_date,
5591 );
5592 }
5593 }
5594 }
5595
5596 #[test]
5600 fn apply_calibration_step_updates_lognormal_sigma() {
5601 let mut coa_gen =
5602 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
5603 let coa = Arc::new(coa_gen.generate());
5604
5605 let mut gen = JournalEntryGenerator::new_with_params(
5606 TransactionConfig::default(),
5607 coa,
5608 vec!["1000".to_string()],
5609 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
5610 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
5611 42,
5612 );
5613
5614 let baseline_sigma = gen.amount_sampler.lognormal_sigma();
5615
5616 let step_sigma = crate::velocity_calibrator::CalibrationStep {
5617 rule_id: "R6".to_string(),
5618 parameter: "amounts.lognormal_sigma".to_string(),
5619 delta: 0.01,
5620 new_value: baseline_sigma + 0.01,
5621 };
5622 gen.apply_calibration_step(&step_sigma);
5623 assert!(
5624 (gen.amount_sampler.lognormal_sigma() - (baseline_sigma + 0.01)).abs() < 1e-9,
5625 "lognormal_sigma should be updated to {}",
5626 baseline_sigma + 0.01
5627 );
5628
5629 let baseline_round = gen.amount_sampler.round_number_probability();
5630 let step_round = crate::velocity_calibrator::CalibrationStep {
5631 rule_id: "R9".to_string(),
5632 parameter: "amounts.round_dollar_share".to_string(),
5633 delta: -0.005,
5634 new_value: (baseline_round - 0.005).max(0.0),
5635 };
5636 gen.apply_calibration_step(&step_round);
5637 let expected = (baseline_round - 0.005).max(0.0).clamp(0.0, 1.0);
5638 assert!(
5639 (gen.amount_sampler.round_number_probability() - expected).abs() < 1e-9,
5640 "round_number_probability should be updated to {}",
5641 expected
5642 );
5643 }
5644
5645 #[test]
5646 fn master_data_resolver_fills_every_pii_kind() {
5647 use datasynth_core::distributions::text_taxonomy::{
5648 PiiPlaceholderKind, PlaceholderResolver,
5649 };
5650 let mut r = MasterDataResolver {
5651 companies: vec!["Acme AG".to_string()],
5652 persons: vec!["Hans Muster".to_string()],
5653 streets: vec!["Hauptstrasse 1".to_string()],
5654 patients: vec!["Patient X".to_string()],
5655 };
5656 let mut rng = rand::rng();
5657 assert_eq!(r.resolve(PiiPlaceholderKind::Company, &mut rng), "Acme AG");
5658 assert_eq!(
5659 r.resolve(PiiPlaceholderKind::Person, &mut rng),
5660 "Hans Muster"
5661 );
5662 assert_eq!(
5663 r.resolve(PiiPlaceholderKind::Street, &mut rng),
5664 "Hauptstrasse 1"
5665 );
5666 assert_eq!(
5667 r.resolve(PiiPlaceholderKind::Patient, &mut rng),
5668 "Patient X"
5669 );
5670 }
5671
5672 #[test]
5673 fn master_data_resolver_empty_pool_falls_back() {
5674 use datasynth_core::distributions::text_taxonomy::{
5675 PiiPlaceholderKind, PlaceholderResolver,
5676 };
5677 let mut r = MasterDataResolver::default();
5678 let mut rng = rand::rng();
5679 let v = r.resolve(PiiPlaceholderKind::Company, &mut rng);
5680 assert!(!v.is_empty());
5681 }
5682
5683 #[test]
5695 fn synthetic_patient_pool_entries_pass_residual_scan() {
5696 use datasynth_core::distributions::text_taxonomy::PlaceholderGrammar;
5697 for name in synthetic_patient_pool("de_CH") {
5698 let filled = format!("*{name} G:2024-01-15 E:2024-01-20 A:2024-02-01");
5699 let structural: Vec<_> = PlaceholderGrammar::residual_pii_scan(&filled)
5700 .into_iter()
5701 .filter(|h| h.pattern != "given_name")
5702 .collect();
5703 assert!(
5704 structural.is_empty(),
5705 "synthetic patient name {name:?} fills to PII-shaped {filled:?}: {structural:?}"
5706 );
5707 }
5708 }
5709
5710 #[test]
5711 fn master_data_resolver_fallbacks_are_non_empty_and_placeholder_free() {
5712 use datasynth_core::distributions::text_taxonomy::{
5713 PiiPlaceholderKind, PlaceholderResolver,
5714 };
5715 let mut r = MasterDataResolver::default();
5719 let mut rng = rand::rng();
5720 for kind in [
5721 PiiPlaceholderKind::Company,
5722 PiiPlaceholderKind::Person,
5723 PiiPlaceholderKind::Street,
5724 PiiPlaceholderKind::Patient,
5725 ] {
5726 let v = r.resolve(kind, &mut rng);
5727 assert!(!v.is_empty(), "fallback for {kind:?} must be non-empty");
5728 assert!(
5729 !v.contains('{'),
5730 "fallback for {kind:?} must not contain a placeholder token"
5731 );
5732 }
5733 }
5734}