1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 AdvancedDistributionConfig, FraudConfig, GeneratorConfig, MixtureDistributionType,
15 TemplateConfig, TemporalPatternsConfig, TransactionConfig,
16};
17use datasynth_core::distributions::{
18 AdvancedAmountSampler, BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig,
19 DriftController, EventType, IndustryAmountProfile, IndustryType, LagDistribution,
20 PeriodEndConfig, PeriodEndDynamics, PeriodEndModel, ProcessingLagCalculator,
21 ProcessingLagConfig, *,
22};
23use datasynth_core::models::*;
24use datasynth_core::templates::{
25 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
26};
27use datasynth_core::traits::Generator;
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29use datasynth_core::CountryPack;
30
31use crate::company_selector::WeightedCompanySelector;
32use crate::user_generator::{UserGenerator, UserGeneratorConfig};
33
34pub struct JournalEntryGenerator {
36 rng: ChaCha8Rng,
37 seed: u64,
38 config: TransactionConfig,
39 coa: Arc<ChartOfAccounts>,
40 companies: Vec<String>,
41 company_selector: WeightedCompanySelector,
42 line_sampler: LineItemSampler,
43 amount_sampler: AmountSampler,
44 temporal_sampler: TemporalSampler,
45 start_date: NaiveDate,
46 end_date: NaiveDate,
47 count: u64,
48 uuid_factory: DeterministicUuidFactory,
49 user_pool: Option<UserPool>,
51 description_generator: DescriptionGenerator,
52 reference_generator: ReferenceGenerator,
53 template_config: TemplateConfig,
54 vendor_pool: VendorPool,
55 customer_pool: CustomerPool,
56 material_pool: Option<MaterialPool>,
58 using_real_master_data: bool,
60 fraud_config: FraudConfig,
62 persona_errors_enabled: bool,
64 approval_enabled: bool,
66 approval_threshold: rust_decimal::Decimal,
67 sod_violation_rate: f64,
69 batch_state: Option<BatchState>,
71 drift_controller: Option<DriftController>,
73 business_day_calculator: Option<BusinessDayCalculator>,
75 processing_lag_calculator: Option<ProcessingLagCalculator>,
76 temporal_patterns_config: Option<TemporalPatternsConfig>,
77 business_process_weights: [(BusinessProcess, f64); 5],
81 advanced_amount_sampler: Option<AdvancedAmountSampler>,
85 conditional_amount_override: Option<datasynth_core::distributions::ConditionalSampler>,
93 correlation_copula: Option<datasynth_core::distributions::BivariateCopulaSampler>,
99}
100
101const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
102 (BusinessProcess::O2C, 0.35),
103 (BusinessProcess::P2P, 0.30),
104 (BusinessProcess::R2R, 0.20),
105 (BusinessProcess::H2R, 0.10),
106 (BusinessProcess::A2R, 0.05),
107];
108
109impl JournalEntryGenerator {
135 fn supported_conditional_input(field: &str) -> bool {
136 matches!(
137 field,
138 "month"
139 | "quarter"
140 | "year"
141 | "day_of_week"
142 | "day_of_month"
143 | "day_of_year"
144 | "week_of_year"
145 | "is_period_end"
146 | "is_quarter_end"
147 | "is_year_end"
148 | "constant"
149 | ""
150 )
151 }
152
153 fn conditional_input_value(&self, posting_date: chrono::NaiveDate) -> f64 {
154 let input_field = match self
155 .conditional_amount_override
156 .as_ref()
157 .map(|s| s.config().input_field.as_str())
158 {
159 Some(f) => f,
160 None => return 0.0,
161 };
162
163 let is_last_business_day = |d: chrono::NaiveDate| -> bool {
164 let next = d.succ_opt();
168 match next {
169 Some(n) => n.month() != d.month(),
170 None => true,
171 }
172 };
173
174 match input_field {
175 "month" => posting_date.month() as f64,
176 "quarter" => ((posting_date.month() - 1) / 3 + 1) as f64,
177 "year" => posting_date.year() as f64,
178 "day_of_week" => posting_date.weekday().number_from_monday() as f64,
179 "day_of_month" => posting_date.day() as f64,
180 "day_of_year" => posting_date.ordinal() as f64,
181 "week_of_year" => posting_date.iso_week().week() as f64,
182 "is_period_end" => f64::from(u8::from(is_last_business_day(posting_date))),
183 "is_quarter_end" => {
184 let m = posting_date.month();
185 let is_q_month = matches!(m, 3 | 6 | 9 | 12);
186 f64::from(u8::from(is_q_month && is_last_business_day(posting_date)))
187 }
188 "is_year_end" => f64::from(u8::from(
189 posting_date.month() == 12 && is_last_business_day(posting_date),
190 )),
191 _ => 0.0,
192 }
193 }
194}
195
196fn industry_profile_to_log_normal(
197 p: datasynth_config::schema::IndustryProfileType,
198) -> datasynth_core::distributions::LogNormalMixtureConfig {
199 use datasynth_config::schema::IndustryProfileType as P;
200 let industry = match p {
201 P::Retail => IndustryType::Retail,
202 P::Manufacturing => IndustryType::Manufacturing,
203 P::FinancialServices => IndustryType::FinancialServices,
204 P::Healthcare => IndustryType::Healthcare,
205 P::Technology => IndustryType::Technology,
206 };
207 IndustryAmountProfile::for_industry(industry).sales_amounts
208}
209
210#[derive(Clone)]
215struct BatchState {
216 base_account_number: String,
218 base_amount: rust_decimal::Decimal,
219 base_business_process: Option<BusinessProcess>,
220 base_posting_date: NaiveDate,
221 remaining: u8,
223}
224
225impl JournalEntryGenerator {
226 pub fn new_with_params(
228 config: TransactionConfig,
229 coa: Arc<ChartOfAccounts>,
230 companies: Vec<String>,
231 start_date: NaiveDate,
232 end_date: NaiveDate,
233 seed: u64,
234 ) -> Self {
235 Self::new_with_full_config(
236 config,
237 coa,
238 companies,
239 start_date,
240 end_date,
241 seed,
242 TemplateConfig::default(),
243 None,
244 )
245 }
246
247 #[allow(clippy::too_many_arguments)]
249 pub fn new_with_full_config(
250 config: TransactionConfig,
251 coa: Arc<ChartOfAccounts>,
252 companies: Vec<String>,
253 start_date: NaiveDate,
254 end_date: NaiveDate,
255 seed: u64,
256 template_config: TemplateConfig,
257 user_pool: Option<UserPool>,
258 ) -> Self {
259 let user_pool = user_pool.or_else(|| {
261 if template_config.names.generate_realistic_names {
262 let user_gen_config = UserGeneratorConfig {
263 culture_distribution: vec![
264 (
265 datasynth_core::templates::NameCulture::WesternUs,
266 template_config.names.culture_distribution.western_us,
267 ),
268 (
269 datasynth_core::templates::NameCulture::Hispanic,
270 template_config.names.culture_distribution.hispanic,
271 ),
272 (
273 datasynth_core::templates::NameCulture::German,
274 template_config.names.culture_distribution.german,
275 ),
276 (
277 datasynth_core::templates::NameCulture::French,
278 template_config.names.culture_distribution.french,
279 ),
280 (
281 datasynth_core::templates::NameCulture::Chinese,
282 template_config.names.culture_distribution.chinese,
283 ),
284 (
285 datasynth_core::templates::NameCulture::Japanese,
286 template_config.names.culture_distribution.japanese,
287 ),
288 (
289 datasynth_core::templates::NameCulture::Indian,
290 template_config.names.culture_distribution.indian,
291 ),
292 ],
293 email_domain: template_config.names.email_domain.clone(),
294 generate_realistic_names: true,
295 };
296 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
297 Some(user_gen.generate_standard(&companies))
298 } else {
299 None
300 }
301 });
302
303 let mut ref_gen = ReferenceGenerator::new(
305 start_date.year(),
306 companies
307 .first()
308 .map(std::string::String::as_str)
309 .unwrap_or("1000"),
310 );
311 ref_gen.set_prefix(
312 ReferenceType::Invoice,
313 &template_config.references.invoice_prefix,
314 );
315 ref_gen.set_prefix(
316 ReferenceType::PurchaseOrder,
317 &template_config.references.po_prefix,
318 );
319 ref_gen.set_prefix(
320 ReferenceType::SalesOrder,
321 &template_config.references.so_prefix,
322 );
323
324 let company_selector = WeightedCompanySelector::uniform(companies.clone());
326
327 Self {
328 rng: seeded_rng(seed, 0),
329 seed,
330 config: config.clone(),
331 coa,
332 companies,
333 company_selector,
334 line_sampler: LineItemSampler::with_config(
335 seed + 1,
336 config.line_item_distribution.clone(),
337 config.even_odd_distribution.clone(),
338 config.debit_credit_distribution.clone(),
339 ),
340 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
341 temporal_sampler: TemporalSampler::with_config(
342 seed + 3,
343 config.seasonality.clone(),
344 WorkingHoursConfig::default(),
345 Vec::new(),
346 ),
347 start_date,
348 end_date,
349 count: 0,
350 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
351 user_pool,
352 description_generator: DescriptionGenerator::new(),
353 reference_generator: ref_gen,
354 template_config,
355 vendor_pool: VendorPool::standard(),
356 customer_pool: CustomerPool::standard(),
357 material_pool: None,
358 using_real_master_data: false,
359 fraud_config: FraudConfig::default(),
360 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
365 drift_controller: None,
366 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
369 Region::US,
370 start_date.year(),
371 ))),
372 processing_lag_calculator: None,
373 temporal_patterns_config: None,
374 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
375 advanced_amount_sampler: None,
376 conditional_amount_override: None,
377 correlation_copula: None,
378 }
379 }
380
381 pub fn set_advanced_distributions(
400 &mut self,
401 config: &AdvancedDistributionConfig,
402 seed: u64,
403 ) -> Result<(), String> {
404 if !config.enabled {
405 return Ok(());
406 }
407
408 self.conditional_amount_override = config
414 .conditional
415 .iter()
416 .find(|c| {
417 c.output_field == "amount" && Self::supported_conditional_input(&c.input_field)
418 })
419 .and_then(|c| {
420 datasynth_core::distributions::ConditionalSampler::new(
421 seed.wrapping_add(17),
422 c.to_core_config(),
423 )
424 .ok()
425 });
426
427 self.correlation_copula = config
433 .correlations
434 .to_core_config_for_pair("amount", "line_count")
435 .and_then(|copula_cfg| {
436 datasynth_core::distributions::BivariateCopulaSampler::new(
437 seed.wrapping_add(31),
438 copula_cfg,
439 )
440 .ok()
441 });
442
443 if let Some(pareto) = &config.pareto {
448 if pareto.enabled {
449 let core_cfg = pareto.to_core_config();
450 self.advanced_amount_sampler =
451 Some(AdvancedAmountSampler::new_pareto(seed, core_cfg)?);
452 return Ok(());
453 }
454 }
455
456 if !config.amounts.enabled {
457 return Ok(());
458 }
459
460 match config.amounts.distribution_type {
461 MixtureDistributionType::LogNormal => {
462 let lognormal_cfg = config
463 .amounts
464 .to_log_normal_config()
465 .or_else(|| config.industry_profile.map(industry_profile_to_log_normal));
466 if let Some(cfg) = lognormal_cfg {
467 self.advanced_amount_sampler =
468 Some(AdvancedAmountSampler::new_log_normal(seed, cfg)?);
469 }
470 }
471 MixtureDistributionType::Gaussian => {
472 if let Some(cfg) = config.amounts.to_gaussian_config() {
473 self.advanced_amount_sampler =
474 Some(AdvancedAmountSampler::new_gaussian(seed, cfg)?);
475 }
476 }
477 }
478
479 Ok(())
480 }
481
482 pub fn set_business_process_weights(
486 &mut self,
487 o2c: f64,
488 p2p: f64,
489 r2r: f64,
490 h2r: f64,
491 a2r: f64,
492 ) {
493 self.business_process_weights = [
494 (BusinessProcess::O2C, o2c),
495 (BusinessProcess::P2P, p2p),
496 (BusinessProcess::R2R, r2r),
497 (BusinessProcess::H2R, h2r),
498 (BusinessProcess::A2R, a2r),
499 ];
500 }
501
502 pub fn from_generator_config(
507 full_config: &GeneratorConfig,
508 coa: Arc<ChartOfAccounts>,
509 start_date: NaiveDate,
510 end_date: NaiveDate,
511 seed: u64,
512 ) -> Self {
513 let companies: Vec<String> = full_config
514 .companies
515 .iter()
516 .map(|c| c.code.clone())
517 .collect();
518
519 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
521
522 let mut generator = Self::new_with_full_config(
523 full_config.transactions.clone(),
524 coa,
525 companies,
526 start_date,
527 end_date,
528 seed,
529 full_config.templates.clone(),
530 None,
531 );
532
533 generator.company_selector = company_selector;
535
536 generator.fraud_config = full_config.fraud.clone();
538
539 let temporal_config = &full_config.temporal_patterns;
541 if temporal_config.enabled {
542 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
543 }
544
545 generator
546 }
547
548 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
555 if config.business_days.enabled {
557 let region = config
558 .calendars
559 .regions
560 .first()
561 .map(|r| Self::parse_region(r))
562 .unwrap_or(Region::US);
563
564 let calendar = HolidayCalendar::new(region, self.start_date.year());
565 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
566 }
567
568 if config.processing_lags.enabled {
570 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
571 self.processing_lag_calculator =
572 Some(ProcessingLagCalculator::with_config(seed, lag_config));
573 }
574
575 let model = config.period_end.model.as_deref().unwrap_or("flat");
577 if model != "flat"
578 || config
579 .period_end
580 .month_end
581 .as_ref()
582 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
583 {
584 let dynamics = Self::convert_period_end_config(&config.period_end);
585 self.temporal_sampler.set_period_end_dynamics(dynamics);
586 }
587
588 self.temporal_patterns_config = Some(config);
589 self
590 }
591
592 pub fn with_country_pack_temporal(
600 mut self,
601 config: TemporalPatternsConfig,
602 seed: u64,
603 pack: &CountryPack,
604 ) -> Self {
605 if config.business_days.enabled {
607 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
608 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
609 }
610
611 if config.processing_lags.enabled {
613 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
614 self.processing_lag_calculator =
615 Some(ProcessingLagCalculator::with_config(seed, lag_config));
616 }
617
618 let model = config.period_end.model.as_deref().unwrap_or("flat");
620 if model != "flat"
621 || config
622 .period_end
623 .month_end
624 .as_ref()
625 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
626 {
627 let dynamics = Self::convert_period_end_config(&config.period_end);
628 self.temporal_sampler.set_period_end_dynamics(dynamics);
629 }
630
631 self.temporal_patterns_config = Some(config);
632 self
633 }
634
635 fn convert_processing_lag_config(
637 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
638 ) -> ProcessingLagConfig {
639 let mut config = ProcessingLagConfig {
640 enabled: schema.enabled,
641 ..Default::default()
642 };
643
644 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
646 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
647 if let Some(min) = lag.min_hours {
648 dist.min_lag_hours = min;
649 }
650 if let Some(max) = lag.max_hours {
651 dist.max_lag_hours = max;
652 }
653 dist
654 };
655
656 if let Some(ref lag) = schema.sales_order_lag {
658 config
659 .event_lags
660 .insert(EventType::SalesOrder, convert_lag(lag));
661 }
662 if let Some(ref lag) = schema.purchase_order_lag {
663 config
664 .event_lags
665 .insert(EventType::PurchaseOrder, convert_lag(lag));
666 }
667 if let Some(ref lag) = schema.goods_receipt_lag {
668 config
669 .event_lags
670 .insert(EventType::GoodsReceipt, convert_lag(lag));
671 }
672 if let Some(ref lag) = schema.invoice_receipt_lag {
673 config
674 .event_lags
675 .insert(EventType::InvoiceReceipt, convert_lag(lag));
676 }
677 if let Some(ref lag) = schema.invoice_issue_lag {
678 config
679 .event_lags
680 .insert(EventType::InvoiceIssue, convert_lag(lag));
681 }
682 if let Some(ref lag) = schema.payment_lag {
683 config
684 .event_lags
685 .insert(EventType::Payment, convert_lag(lag));
686 }
687 if let Some(ref lag) = schema.journal_entry_lag {
688 config
689 .event_lags
690 .insert(EventType::JournalEntry, convert_lag(lag));
691 }
692
693 if let Some(ref cross_day) = schema.cross_day_posting {
695 config.cross_day = CrossDayConfig {
696 enabled: cross_day.enabled,
697 probability_by_hour: cross_day.probability_by_hour.clone(),
698 ..Default::default()
699 };
700 }
701
702 config
703 }
704
705 fn convert_period_end_config(
707 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
708 ) -> PeriodEndDynamics {
709 let model_type = schema.model.as_deref().unwrap_or("exponential");
710
711 let convert_period =
713 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
714 default_peak: f64|
715 -> PeriodEndConfig {
716 if let Some(p) = period {
717 let model = match model_type {
718 "flat" => PeriodEndModel::FlatMultiplier {
719 multiplier: p.peak_multiplier.unwrap_or(default_peak),
720 },
721 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
722 start_day: p.start_day.unwrap_or(-10),
723 sustained_high_days: p.sustained_high_days.unwrap_or(3),
724 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
725 ramp_up_days: 3, },
727 _ => PeriodEndModel::ExponentialAcceleration {
728 start_day: p.start_day.unwrap_or(-10),
729 base_multiplier: p.base_multiplier.unwrap_or(1.0),
730 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
731 decay_rate: p.decay_rate.unwrap_or(0.3),
732 },
733 };
734 PeriodEndConfig {
735 enabled: true,
736 model,
737 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
738 }
739 } else {
740 PeriodEndConfig {
741 enabled: true,
742 model: PeriodEndModel::ExponentialAcceleration {
743 start_day: -10,
744 base_multiplier: 1.0,
745 peak_multiplier: default_peak,
746 decay_rate: 0.3,
747 },
748 additional_multiplier: 1.0,
749 }
750 }
751 };
752
753 PeriodEndDynamics::new(
754 convert_period(schema.month_end.as_ref(), 2.0),
755 convert_period(schema.quarter_end.as_ref(), 3.5),
756 convert_period(schema.year_end.as_ref(), 5.0),
757 )
758 }
759
760 fn parse_region(region_str: &str) -> Region {
762 match region_str.to_uppercase().as_str() {
763 "US" => Region::US,
764 "DE" => Region::DE,
765 "GB" => Region::GB,
766 "CN" => Region::CN,
767 "JP" => Region::JP,
768 "IN" => Region::IN,
769 "BR" => Region::BR,
770 "MX" => Region::MX,
771 "AU" => Region::AU,
772 "SG" => Region::SG,
773 "KR" => Region::KR,
774 "FR" => Region::FR,
775 "IT" => Region::IT,
776 "ES" => Region::ES,
777 "CA" => Region::CA,
778 _ => Region::US,
779 }
780 }
781
782 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
784 self.company_selector = selector;
785 }
786
787 pub fn company_selector(&self) -> &WeightedCompanySelector {
789 &self.company_selector
790 }
791
792 pub fn set_fraud_config(&mut self, config: FraudConfig) {
794 self.fraud_config = config;
795 }
796
797 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
802 if !vendors.is_empty() {
803 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
804 self.using_real_master_data = true;
805 }
806 self
807 }
808
809 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
814 if !customers.is_empty() {
815 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
816 self.using_real_master_data = true;
817 }
818 self
819 }
820
821 pub fn with_materials(mut self, materials: &[Material]) -> Self {
825 if !materials.is_empty() {
826 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
827 self.using_real_master_data = true;
828 }
829 self
830 }
831
832 pub fn with_master_data(
837 self,
838 vendors: &[Vendor],
839 customers: &[Customer],
840 materials: &[Material],
841 ) -> Self {
842 self.with_vendors(vendors)
843 .with_customers(customers)
844 .with_materials(materials)
845 }
846
847 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
854 let name_gen =
855 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
856 let config = UserGeneratorConfig {
857 culture_distribution: Vec::new(),
860 email_domain: name_gen.email_domain().to_string(),
861 generate_realistic_names: true,
862 };
863 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
864 self.user_pool = Some(user_gen.generate_standard(&self.companies));
865 self
866 }
867
868 pub fn is_using_real_master_data(&self) -> bool {
870 self.using_real_master_data
871 }
872
873 fn determine_fraud(&mut self) -> Option<FraudType> {
875 if !self.fraud_config.enabled {
876 return None;
877 }
878
879 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
881 return None;
882 }
883
884 Some(self.select_fraud_type())
886 }
887
888 fn select_fraud_type(&mut self) -> FraudType {
890 let dist = &self.fraud_config.fraud_type_distribution;
891 let roll: f64 = self.rng.random();
892
893 let mut cumulative = 0.0;
894
895 cumulative += dist.suspense_account_abuse;
896 if roll < cumulative {
897 return FraudType::SuspenseAccountAbuse;
898 }
899
900 cumulative += dist.fictitious_transaction;
901 if roll < cumulative {
902 return FraudType::FictitiousTransaction;
903 }
904
905 cumulative += dist.revenue_manipulation;
906 if roll < cumulative {
907 return FraudType::RevenueManipulation;
908 }
909
910 cumulative += dist.expense_capitalization;
911 if roll < cumulative {
912 return FraudType::ExpenseCapitalization;
913 }
914
915 cumulative += dist.split_transaction;
916 if roll < cumulative {
917 return FraudType::SplitTransaction;
918 }
919
920 cumulative += dist.timing_anomaly;
921 if roll < cumulative {
922 return FraudType::TimingAnomaly;
923 }
924
925 cumulative += dist.unauthorized_access;
926 if roll < cumulative {
927 return FraudType::UnauthorizedAccess;
928 }
929
930 FraudType::DuplicatePayment
932 }
933
934 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
936 match fraud_type {
937 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
938 FraudAmountPattern::ThresholdAdjacent
939 }
940 FraudType::FictitiousTransaction
941 | FraudType::FictitiousEntry
942 | FraudType::SuspenseAccountAbuse
943 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
944 FraudType::RevenueManipulation
945 | FraudType::ExpenseCapitalization
946 | FraudType::ImproperCapitalization
947 | FraudType::ReserveManipulation
948 | FraudType::UnauthorizedAccess
949 | FraudType::PrematureRevenue
950 | FraudType::UnderstatedLiabilities
951 | FraudType::OverstatedAssets
952 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
953 FraudType::DuplicatePayment
954 | FraudType::TimingAnomaly
955 | FraudType::SelfApproval
956 | FraudType::ExceededApprovalLimit
957 | FraudType::SegregationOfDutiesViolation
958 | FraudType::UnauthorizedApproval
959 | FraudType::CollusiveApproval
960 | FraudType::FictitiousVendor
961 | FraudType::ShellCompanyPayment
962 | FraudType::Kickback
963 | FraudType::KickbackScheme
964 | FraudType::InvoiceManipulation
965 | FraudType::AssetMisappropriation
966 | FraudType::InventoryTheft
967 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
968 FraudType::ImproperRevenueRecognition
970 | FraudType::ImproperPoAllocation
971 | FraudType::VariableConsiderationManipulation
972 | FraudType::ContractModificationMisstatement => {
973 FraudAmountPattern::StatisticallyImprobable
974 }
975 FraudType::LeaseClassificationManipulation
977 | FraudType::OffBalanceSheetLease
978 | FraudType::LeaseLiabilityUnderstatement
979 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
980 FraudType::FairValueHierarchyManipulation
982 | FraudType::Level3InputManipulation
983 | FraudType::ValuationTechniqueManipulation => {
984 FraudAmountPattern::StatisticallyImprobable
985 }
986 FraudType::DelayedImpairment
988 | FraudType::ImpairmentTestAvoidance
989 | FraudType::CashFlowProjectionManipulation
990 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
991 FraudType::BidRigging
993 | FraudType::PhantomVendorContract
994 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
995 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
996 FraudType::GhostEmployeePayroll
998 | FraudType::PayrollInflation
999 | FraudType::DuplicateExpenseReport
1000 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
1001 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
1002 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
1004 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
1005 }
1006 }
1007
1008 #[inline]
1010 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
1011 self.uuid_factory.next()
1012 }
1013
1014 const COST_CENTER_POOL: &'static [&'static str] =
1016 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
1017
1018 fn enrich_line_items(&self, entry: &mut JournalEntry) {
1024 let posting_date = entry.header.posting_date;
1025 let company_code = &entry.header.company_code;
1026 let header_text = entry.header.header_text.clone();
1027 let business_process = entry.header.business_process;
1028
1029 let doc_id_bytes = entry.header.document_id.as_bytes();
1031 let mut cc_seed: usize = 0;
1032 for &b in doc_id_bytes {
1033 cc_seed = cc_seed.wrapping_add(b as usize);
1034 }
1035
1036 for (i, line) in entry.lines.iter_mut().enumerate() {
1037 if line.account_description.is_none() {
1039 line.account_description = self
1040 .coa
1041 .get_account(&line.gl_account)
1042 .map(|a| a.short_description.clone());
1043 }
1044
1045 if line.cost_center.is_none() {
1047 let first_char = line.gl_account.chars().next().unwrap_or('0');
1048 if first_char == '5' || first_char == '6' {
1049 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
1050 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
1051 }
1052 }
1053
1054 if line.profit_center.is_none() {
1056 let suffix = match business_process {
1057 Some(BusinessProcess::P2P) => "-P2P",
1058 Some(BusinessProcess::O2C) => "-O2C",
1059 Some(BusinessProcess::R2R) => "-R2R",
1060 Some(BusinessProcess::H2R) => "-H2R",
1061 _ => "",
1062 };
1063 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
1064 }
1065
1066 if line.line_text.is_none() {
1068 line.line_text = header_text.clone();
1069 }
1070
1071 if line.value_date.is_none()
1073 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
1074 {
1075 line.value_date = Some(posting_date);
1076 }
1077
1078 if line.assignment.is_none() {
1080 if line.gl_account.starts_with("2000") {
1081 if let Some(ref ht) = header_text {
1083 if let Some(vendor_part) = ht.rsplit(" - ").next() {
1085 if vendor_part.starts_with("V-")
1086 || vendor_part.starts_with("VENDOR")
1087 || vendor_part.starts_with("Vendor")
1088 {
1089 line.assignment = Some(vendor_part.to_string());
1090 }
1091 }
1092 }
1093 } else if line.gl_account.starts_with("1100") {
1094 if let Some(ref ht) = header_text {
1096 if let Some(customer_part) = ht.rsplit(" - ").next() {
1097 if customer_part.starts_with("C-")
1098 || customer_part.starts_with("CUST")
1099 || customer_part.starts_with("Customer")
1100 {
1101 line.assignment = Some(customer_part.to_string());
1102 }
1103 }
1104 }
1105 }
1106 }
1107 }
1108 }
1109
1110 pub fn generate(&mut self) -> JournalEntry {
1112 debug!(
1113 count = self.count,
1114 companies = self.companies.len(),
1115 start_date = %self.start_date,
1116 end_date = %self.end_date,
1117 "Generating journal entry"
1118 );
1119
1120 if let Some(ref state) = self.batch_state {
1122 if state.remaining > 0 {
1123 return self.generate_batched_entry();
1124 }
1125 }
1126
1127 self.count += 1;
1128
1129 let document_id = self.generate_deterministic_uuid();
1131
1132 let mut posting_date = self
1134 .temporal_sampler
1135 .sample_date(self.start_date, self.end_date);
1136
1137 if let Some(ref calc) = self.business_day_calculator {
1139 if !calc.is_business_day(posting_date) {
1140 posting_date = calc.next_business_day(posting_date, false);
1142 if posting_date > self.end_date {
1144 posting_date = calc.prev_business_day(self.end_date, true);
1145 }
1146 }
1147 }
1148
1149 let company_code = self.company_selector.select(&mut self.rng).to_string();
1151
1152 let copula_uv: Option<(f64, f64)> =
1156 self.correlation_copula.as_mut().map(|cop| cop.sample());
1157
1158 let mut line_spec = self.line_sampler.sample();
1167 if let Some((_u, v)) = copula_uv {
1168 let new_total = 2 + ((v * 10.0).floor() as usize).min(9);
1169 let old_debit = line_spec.debit_count.max(1);
1170 let old_credit = line_spec.credit_count.max(1);
1171 let new_debit = (new_total as f64 * old_debit as f64 / (old_debit + old_credit) as f64)
1172 .round() as usize;
1173 let new_debit = new_debit.clamp(1, new_total - 1);
1174 let new_credit = new_total - new_debit;
1175 line_spec.total_count = new_total;
1176 line_spec.debit_count = new_debit;
1177 line_spec.credit_count = new_credit;
1178 }
1179
1180 let source = self.select_source();
1182 let is_automated = matches!(
1183 source,
1184 TransactionSource::Automated | TransactionSource::Recurring
1185 );
1186
1187 let business_process = self.select_business_process();
1189
1190 let fraud_type = self.determine_fraud();
1192 let is_fraud = fraud_type.is_some();
1193
1194 let time = self.temporal_sampler.sample_time(!is_automated);
1196 let created_at = posting_date.and_time(time).and_utc();
1197
1198 let (created_by, user_persona) = self.select_user(is_automated);
1200
1201 let mut header =
1203 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1204 header.created_at = created_at;
1205 header.source = source;
1206 header.created_by = created_by;
1207 header.user_persona = user_persona;
1208 header.business_process = Some(business_process);
1209 header.document_type = Self::document_type_for_process(business_process).to_string();
1210 header.is_fraud = is_fraud;
1211 header.fraud_type = fraud_type;
1212
1213 let is_manual = matches!(source, TransactionSource::Manual);
1215 header.is_manual = is_manual;
1216
1217 header.source_system = if is_manual {
1219 if self.rng.random::<f64>() < 0.70 {
1220 "manual".to_string()
1221 } else {
1222 "spreadsheet".to_string()
1223 }
1224 } else {
1225 let roll: f64 = self.rng.random();
1226 if roll < 0.40 {
1227 "SAP-FI".to_string()
1228 } else if roll < 0.60 {
1229 "SAP-MM".to_string()
1230 } else if roll < 0.80 {
1231 "SAP-SD".to_string()
1232 } else if roll < 0.95 {
1233 "interface".to_string()
1234 } else {
1235 "SAP-HR".to_string()
1236 }
1237 };
1238
1239 let is_post_close = posting_date.month() == self.end_date.month()
1242 && posting_date.year() == self.end_date.year()
1243 && posting_date.day() > 25;
1244 header.is_post_close = is_post_close;
1245
1246 let created_date = if is_manual {
1249 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1250 } else {
1251 let lag_days = self.rng.random_range(0i64..=3);
1252 let created_naive_date = posting_date
1253 .checked_sub_signed(chrono::Duration::days(lag_days))
1254 .unwrap_or(posting_date);
1255 created_naive_date.and_hms_opt(
1256 self.rng.random_range(8u32..=17),
1257 self.rng.random_range(0u32..=59),
1258 self.rng.random_range(0u32..=59),
1259 )
1260 };
1261 header.created_date = created_date;
1262
1263 let mut context =
1265 DescriptionContext::with_period(posting_date.month(), posting_date.year());
1266
1267 match business_process {
1269 BusinessProcess::P2P => {
1270 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1271 context.vendor_name = Some(vendor.name.clone());
1272 }
1273 }
1274 BusinessProcess::O2C => {
1275 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1276 context.customer_name = Some(customer.name.clone());
1277 }
1278 }
1279 _ => {}
1280 }
1281
1282 if self.template_config.descriptions.generate_header_text {
1284 header.header_text = Some(self.description_generator.generate_header_text(
1285 business_process,
1286 &context,
1287 &mut self.rng,
1288 ));
1289 }
1290
1291 if self.template_config.references.generate_references {
1293 header.reference = Some(
1294 self.reference_generator
1295 .generate_for_process_year(business_process, posting_date.year()),
1296 );
1297 }
1298
1299 header.source_document = header
1301 .reference
1302 .as_deref()
1303 .and_then(DocumentRef::parse)
1304 .or_else(|| {
1305 if header.source == TransactionSource::Manual {
1306 Some(DocumentRef::Manual)
1307 } else {
1308 None
1309 }
1310 });
1311
1312 let mut entry = JournalEntry::new(header);
1314
1315 let base_amount = if let Some(ft) = fraud_type {
1321 let pattern = self.fraud_type_to_amount_pattern(ft);
1322 self.amount_sampler.sample_fraud(pattern)
1323 } else if let Some(ref mut adv) = self.advanced_amount_sampler {
1324 adv.sample_decimal()
1325 } else {
1326 self.amount_sampler.sample()
1327 };
1328 let base_amount = if fraud_type.is_none() {
1334 let input = self.conditional_input_value(posting_date);
1338 if let Some(ref mut cond) = self.conditional_amount_override {
1339 cond.sample_decimal(input)
1340 } else {
1341 base_amount
1342 }
1343 } else {
1344 base_amount
1345 };
1346
1347 let base_amount = if fraud_type.is_none() {
1358 if let Some((u, _v)) = copula_uv {
1359 if let Some(ref adv) = self.advanced_amount_sampler {
1360 adv.ppf_decimal(u)
1361 } else {
1362 let log_mult = 4.0 * (u - 0.5);
1363 let adjusted = base_amount.to_f64().unwrap_or(1.0) * log_mult.exp();
1364 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1365 }
1366 } else {
1367 base_amount
1368 }
1369 } else {
1370 base_amount
1371 };
1372
1373 let drift_adjusted_amount = {
1375 let drift = self.get_drift_adjustments(posting_date);
1376 if drift.amount_mean_multiplier != 1.0 {
1377 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1379 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1380 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1381 } else {
1382 base_amount
1383 }
1384 };
1385
1386 let total_amount = if is_automated {
1388 drift_adjusted_amount } else {
1390 self.apply_human_variation(drift_adjusted_amount)
1391 };
1392
1393 let debit_amounts = self
1395 .amount_sampler
1396 .sample_summing_to(line_spec.debit_count, total_amount);
1397 for (i, amount) in debit_amounts.into_iter().enumerate() {
1398 let account_number = self.select_debit_account().account_number.clone();
1399 let mut line = JournalEntryLine::debit(
1400 entry.header.document_id,
1401 (i + 1) as u32,
1402 account_number.clone(),
1403 amount,
1404 );
1405
1406 if self.template_config.descriptions.generate_line_text {
1408 line.line_text = Some(self.description_generator.generate_line_text(
1409 &account_number,
1410 &context,
1411 &mut self.rng,
1412 ));
1413 }
1414
1415 entry.add_line(line);
1416 }
1417
1418 let credit_amounts = self
1420 .amount_sampler
1421 .sample_summing_to(line_spec.credit_count, total_amount);
1422 for (i, amount) in credit_amounts.into_iter().enumerate() {
1423 let account_number = self.select_credit_account().account_number.clone();
1424 let mut line = JournalEntryLine::credit(
1425 entry.header.document_id,
1426 (line_spec.debit_count + i + 1) as u32,
1427 account_number.clone(),
1428 amount,
1429 );
1430
1431 if self.template_config.descriptions.generate_line_text {
1433 line.line_text = Some(self.description_generator.generate_line_text(
1434 &account_number,
1435 &context,
1436 &mut self.rng,
1437 ));
1438 }
1439
1440 entry.add_line(line);
1441 }
1442
1443 self.enrich_line_items(&mut entry);
1445
1446 if self.persona_errors_enabled && !is_automated {
1448 self.maybe_inject_persona_error(&mut entry);
1449 }
1450
1451 if self.approval_enabled {
1453 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1454 }
1455
1456 self.populate_approval_fields(&mut entry, posting_date);
1458
1459 self.maybe_start_batch(&entry);
1461
1462 entry
1463 }
1464
1465 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1470 self.persona_errors_enabled = enabled;
1471 self
1472 }
1473
1474 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1479 self.fraud_config = config;
1480 self
1481 }
1482
1483 pub fn persona_errors_enabled(&self) -> bool {
1485 self.persona_errors_enabled
1486 }
1487
1488 pub fn with_batching(mut self, enabled: bool) -> Self {
1493 if !enabled {
1494 self.batch_state = None;
1495 }
1496 self
1497 }
1498
1499 pub fn batching_enabled(&self) -> bool {
1501 true
1503 }
1504
1505 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1510 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1512 return;
1513 }
1514
1515 if self.rng.random::<f64>() > 0.15 {
1517 return;
1518 }
1519
1520 let base_account = entry
1522 .lines
1523 .first()
1524 .map(|l| l.gl_account.clone())
1525 .unwrap_or_default();
1526
1527 let base_amount = entry.total_debit();
1528
1529 self.batch_state = Some(BatchState {
1530 base_account_number: base_account,
1531 base_amount,
1532 base_business_process: entry.header.business_process,
1533 base_posting_date: entry.header.posting_date,
1534 remaining: self.rng.random_range(2..7), });
1536 }
1537
1538 fn generate_batched_entry(&mut self) -> JournalEntry {
1546 use rust_decimal::Decimal;
1547
1548 if let Some(ref mut state) = self.batch_state {
1550 state.remaining = state.remaining.saturating_sub(1);
1551 }
1552
1553 let Some(batch) = self.batch_state.clone() else {
1554 tracing::warn!(
1557 "generate_batched_entry called without batch_state; generating standard entry"
1558 );
1559 self.batch_state = None;
1560 return self.generate();
1561 };
1562
1563 let posting_date = batch.base_posting_date;
1565
1566 self.count += 1;
1567 let document_id = self.generate_deterministic_uuid();
1568
1569 let company_code = self.company_selector.select(&mut self.rng).to_string();
1571
1572 let _line_spec = LineItemSpec {
1574 total_count: 2,
1575 debit_count: 1,
1576 credit_count: 1,
1577 split_type: DebitCreditSplit::Equal,
1578 };
1579
1580 let source = TransactionSource::Manual;
1582
1583 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1585
1586 let time = self.temporal_sampler.sample_time(true);
1588 let created_at = posting_date.and_time(time).and_utc();
1589
1590 let (created_by, user_persona) = self.select_user(false);
1592
1593 let mut header =
1595 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1596 header.created_at = created_at;
1597 header.source = source;
1598 header.created_by = created_by;
1599 header.user_persona = user_persona;
1600 header.business_process = Some(business_process);
1601 header.document_type = Self::document_type_for_process(business_process).to_string();
1602
1603 header.source_document = Some(DocumentRef::Manual);
1605
1606 header.is_manual = true;
1608 header.source_system = if self.rng.random::<f64>() < 0.70 {
1609 "manual".to_string()
1610 } else {
1611 "spreadsheet".to_string()
1612 };
1613 header.is_post_close = posting_date.month() == self.end_date.month()
1614 && posting_date.year() == self.end_date.year()
1615 && posting_date.day() > 25;
1616 header.created_date =
1617 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1618
1619 let variation = self.rng.random_range(-0.15..0.15);
1621 let varied_amount =
1622 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1623 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1624
1625 let mut entry = JournalEntry::new(header);
1627
1628 let debit_line = JournalEntryLine::debit(
1630 entry.header.document_id,
1631 1,
1632 batch.base_account_number.clone(),
1633 total_amount,
1634 );
1635 entry.add_line(debit_line);
1636
1637 let credit_account = self.select_credit_account().account_number.clone();
1639 let credit_line =
1640 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1641 entry.add_line(credit_line);
1642
1643 self.enrich_line_items(&mut entry);
1645
1646 if self.persona_errors_enabled {
1648 self.maybe_inject_persona_error(&mut entry);
1649 }
1650
1651 if self.approval_enabled {
1653 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1654 }
1655
1656 self.populate_approval_fields(&mut entry, posting_date);
1658
1659 if batch.remaining <= 1 {
1661 self.batch_state = None;
1662 }
1663
1664 entry
1665 }
1666
1667 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1669 let persona_str = &entry.header.user_persona;
1671 let persona = match persona_str.to_lowercase().as_str() {
1672 s if s.contains("junior") => UserPersona::JuniorAccountant,
1673 s if s.contains("senior") => UserPersona::SeniorAccountant,
1674 s if s.contains("controller") => UserPersona::Controller,
1675 s if s.contains("manager") => UserPersona::Manager,
1676 s if s.contains("executive") => UserPersona::Executive,
1677 _ => return, };
1679
1680 let base_error_rate = persona.error_rate();
1682
1683 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1685
1686 if self.rng.random::<f64>() >= adjusted_rate {
1688 return; }
1690
1691 self.inject_human_error(entry, persona);
1693 }
1694
1695 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1704 use chrono::Datelike;
1705
1706 let mut rate = base_rate;
1707 let day = posting_date.day();
1708 let month = posting_date.month();
1709
1710 if month == 12 && day >= 28 {
1712 rate *= 2.0;
1713 return rate.min(0.5); }
1715
1716 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1718 rate *= 1.75; return rate.min(0.4);
1720 }
1721
1722 if day >= 28 {
1724 rate *= 1.5; }
1726
1727 let weekday = posting_date.weekday();
1729 match weekday {
1730 chrono::Weekday::Mon => {
1731 rate *= 1.2;
1733 }
1734 chrono::Weekday::Fri => {
1735 rate *= 1.3;
1737 }
1738 _ => {}
1739 }
1740
1741 rate.min(0.4)
1743 }
1744
1745 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1754 use rust_decimal::Decimal;
1755
1756 if amount < Decimal::from(10) {
1758 return amount;
1759 }
1760
1761 if self.rng.random::<f64>() > 0.70 {
1763 return amount;
1764 }
1765
1766 let variation_type: u8 = self.rng.random_range(0..4);
1768
1769 match variation_type {
1770 0 => {
1771 let variation_pct = self.rng.random_range(-0.02..0.02);
1773 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1774 (amount + variation).round_dp(2)
1775 }
1776 1 => {
1777 let ten = Decimal::from(10);
1779 (amount / ten).round() * ten
1780 }
1781 2 => {
1782 if amount >= Decimal::from(500) {
1784 let hundred = Decimal::from(100);
1785 (amount / hundred).round() * hundred
1786 } else {
1787 amount
1788 }
1789 }
1790 3 => {
1791 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1793 (amount + cents).max(Decimal::ZERO).round_dp(2)
1794 }
1795 _ => amount,
1796 }
1797 }
1798
1799 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1805 let balancing_idx = entry.lines.iter().position(|l| {
1807 if modified_was_debit {
1808 l.credit_amount > Decimal::ZERO
1809 } else {
1810 l.debit_amount > Decimal::ZERO
1811 }
1812 });
1813
1814 if let Some(idx) = balancing_idx {
1815 if modified_was_debit {
1816 entry.lines[idx].credit_amount += impact;
1817 } else {
1818 entry.lines[idx].debit_amount += impact;
1819 }
1820 }
1821 }
1822
1823 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1828 use rust_decimal::Decimal;
1829
1830 let error_type: u8 = match persona {
1832 UserPersona::JuniorAccountant => {
1833 self.rng.random_range(0..5)
1835 }
1836 UserPersona::SeniorAccountant => {
1837 self.rng.random_range(0..3)
1839 }
1840 UserPersona::Controller | UserPersona::Manager => {
1841 self.rng.random_range(3..5)
1843 }
1844 _ => return,
1845 };
1846
1847 match error_type {
1848 0 => {
1849 if let Some(line) = entry.lines.get_mut(0) {
1851 let is_debit = line.debit_amount > Decimal::ZERO;
1852 let original_amount = if is_debit {
1853 line.debit_amount
1854 } else {
1855 line.credit_amount
1856 };
1857
1858 let s = original_amount.to_string();
1860 if s.len() >= 2 {
1861 let chars: Vec<char> = s.chars().collect();
1862 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1863 if chars[pos].is_ascii_digit()
1864 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1865 {
1866 let mut new_chars = chars;
1867 new_chars.swap(pos, pos + 1);
1868 if let Ok(new_amount) =
1869 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1870 {
1871 let impact = new_amount - original_amount;
1872
1873 if is_debit {
1875 entry.lines[0].debit_amount = new_amount;
1876 } else {
1877 entry.lines[0].credit_amount = new_amount;
1878 }
1879
1880 Self::rebalance_entry(entry, is_debit, impact);
1882
1883 entry.header.header_text = Some(
1884 entry.header.header_text.clone().unwrap_or_default()
1885 + " [HUMAN_ERROR:TRANSPOSITION]",
1886 );
1887 }
1888 }
1889 }
1890 }
1891 }
1892 1 => {
1893 if let Some(line) = entry.lines.get_mut(0) {
1895 let is_debit = line.debit_amount > Decimal::ZERO;
1896 let original_amount = if is_debit {
1897 line.debit_amount
1898 } else {
1899 line.credit_amount
1900 };
1901
1902 let new_amount = original_amount * Decimal::new(10, 0);
1903 let impact = new_amount - original_amount;
1904
1905 if is_debit {
1907 entry.lines[0].debit_amount = new_amount;
1908 } else {
1909 entry.lines[0].credit_amount = new_amount;
1910 }
1911
1912 Self::rebalance_entry(entry, is_debit, impact);
1914
1915 entry.header.header_text = Some(
1916 entry.header.header_text.clone().unwrap_or_default()
1917 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1918 );
1919 }
1920 }
1921 2 => {
1922 if let Some(ref mut text) = entry.header.header_text {
1924 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1925 let correct = ["the", "and", "with", "that", "receive"];
1926 let idx = self.rng.random_range(0..typos.len());
1927 if text.to_lowercase().contains(correct[idx]) {
1928 *text = text.replace(correct[idx], typos[idx]);
1929 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1930 }
1931 }
1932 }
1933 3 => {
1934 if let Some(line) = entry.lines.get_mut(0) {
1936 let is_debit = line.debit_amount > Decimal::ZERO;
1937 let original_amount = if is_debit {
1938 line.debit_amount
1939 } else {
1940 line.credit_amount
1941 };
1942
1943 let new_amount =
1944 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1945 let impact = new_amount - original_amount;
1946
1947 if is_debit {
1949 entry.lines[0].debit_amount = new_amount;
1950 } else {
1951 entry.lines[0].credit_amount = new_amount;
1952 }
1953
1954 Self::rebalance_entry(entry, is_debit, impact);
1956
1957 entry.header.header_text = Some(
1958 entry.header.header_text.clone().unwrap_or_default()
1959 + " [HUMAN_ERROR:ROUNDED]",
1960 );
1961 }
1962 }
1963 4 if entry.header.document_date == entry.header.posting_date => {
1966 let days_late = self.rng.random_range(5..15);
1967 entry.header.document_date =
1968 entry.header.posting_date - chrono::Duration::days(days_late);
1969 entry.header.header_text = Some(
1970 entry.header.header_text.clone().unwrap_or_default()
1971 + " [HUMAN_ERROR:LATE_POSTING]",
1972 );
1973 }
1974 _ => {}
1975 }
1976 }
1977
1978 fn maybe_apply_approval_workflow(
1983 &mut self,
1984 entry: &mut JournalEntry,
1985 _posting_date: NaiveDate,
1986 ) {
1987 use rust_decimal::Decimal;
1988
1989 let amount = entry.total_debit();
1990
1991 if amount <= self.approval_threshold {
1993 let workflow = ApprovalWorkflow::auto_approved(
1995 entry.header.created_by.clone(),
1996 entry.header.user_persona.clone(),
1997 amount,
1998 entry.header.created_at,
1999 );
2000 entry.header.approval_workflow = Some(workflow);
2001 return;
2002 }
2003
2004 entry.header.sox_relevant = true;
2006
2007 let required_levels = if amount > Decimal::new(100000, 0) {
2009 3 } else if amount > Decimal::new(50000, 0) {
2011 2 } else {
2013 1 };
2015
2016 let mut workflow = ApprovalWorkflow::new(
2018 entry.header.created_by.clone(),
2019 entry.header.user_persona.clone(),
2020 amount,
2021 );
2022 workflow.required_levels = required_levels;
2023
2024 let submit_time = entry.header.created_at;
2026 let submit_action = ApprovalAction::new(
2027 entry.header.created_by.clone(),
2028 entry.header.user_persona.clone(),
2029 self.parse_persona(&entry.header.user_persona),
2030 ApprovalActionType::Submit,
2031 0,
2032 )
2033 .with_timestamp(submit_time);
2034
2035 workflow.actions.push(submit_action);
2036 workflow.status = ApprovalStatus::Pending;
2037 workflow.submitted_at = Some(submit_time);
2038
2039 let mut current_time = submit_time;
2041 for level in 1..=required_levels {
2042 let delay_hours = self.rng.random_range(1..4);
2044 current_time += chrono::Duration::hours(delay_hours);
2045
2046 while current_time.weekday() == chrono::Weekday::Sat
2048 || current_time.weekday() == chrono::Weekday::Sun
2049 {
2050 current_time += chrono::Duration::days(1);
2051 }
2052
2053 let (approver_id, approver_role) = self.select_approver(level);
2055
2056 let approve_action = ApprovalAction::new(
2057 approver_id.clone(),
2058 approver_role.to_string(),
2059 approver_role,
2060 ApprovalActionType::Approve,
2061 level,
2062 )
2063 .with_timestamp(current_time);
2064
2065 workflow.actions.push(approve_action);
2066 workflow.current_level = level;
2067 }
2068
2069 workflow.status = ApprovalStatus::Approved;
2071 workflow.approved_at = Some(current_time);
2072
2073 entry.header.approval_workflow = Some(workflow);
2074 }
2075
2076 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
2078 let persona = match level {
2079 1 => UserPersona::Manager,
2080 2 => UserPersona::Controller,
2081 _ => UserPersona::Executive,
2082 };
2083
2084 if let Some(ref pool) = self.user_pool {
2086 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2087 return (user.user_id.clone(), persona);
2088 }
2089 }
2090
2091 let approver_id = match persona {
2093 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
2094 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
2095 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
2096 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
2097 };
2098
2099 (approver_id, persona)
2100 }
2101
2102 fn parse_persona(&self, persona_str: &str) -> UserPersona {
2104 match persona_str.to_lowercase().as_str() {
2105 s if s.contains("junior") => UserPersona::JuniorAccountant,
2106 s if s.contains("senior") => UserPersona::SeniorAccountant,
2107 s if s.contains("controller") => UserPersona::Controller,
2108 s if s.contains("manager") => UserPersona::Manager,
2109 s if s.contains("executive") => UserPersona::Executive,
2110 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
2111 _ => UserPersona::JuniorAccountant, }
2113 }
2114
2115 pub fn with_approval(mut self, enabled: bool) -> Self {
2117 self.approval_enabled = enabled;
2118 self
2119 }
2120
2121 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
2123 self.approval_threshold = threshold;
2124 self
2125 }
2126
2127 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
2133 self.sod_violation_rate = rate;
2134 self
2135 }
2136
2137 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
2140 if let Some(ref workflow) = entry.header.approval_workflow {
2141 let last_approver = workflow
2143 .actions
2144 .iter()
2145 .rev()
2146 .find(|a| matches!(a.action, ApprovalActionType::Approve));
2147
2148 if let Some(approver_action) = last_approver {
2149 entry.header.approved_by = Some(approver_action.actor_id.clone());
2150 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
2151 } else {
2152 entry.header.approved_by = Some(workflow.preparer_id.clone());
2154 entry.header.approval_date = Some(posting_date);
2155 }
2156
2157 if self.rng.random::<f64>() < self.sod_violation_rate {
2159 let creator = entry.header.created_by.clone();
2160 entry.header.approved_by = Some(creator);
2161 entry.header.sod_violation = true;
2162 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
2163 }
2164 }
2165 }
2166
2167 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
2173 self.drift_controller = Some(controller);
2174 self
2175 }
2176
2177 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
2182 if config.enabled {
2183 let total_periods = self.calculate_total_periods();
2184 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
2185 }
2186 self
2187 }
2188
2189 fn calculate_total_periods(&self) -> u32 {
2191 let start_year = self.start_date.year();
2192 let start_month = self.start_date.month();
2193 let end_year = self.end_date.year();
2194 let end_month = self.end_date.month();
2195
2196 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
2197 }
2198
2199 fn date_to_period(&self, date: NaiveDate) -> u32 {
2201 let start_year = self.start_date.year();
2202 let start_month = self.start_date.month() as i32;
2203 let date_year = date.year();
2204 let date_month = date.month() as i32;
2205
2206 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
2207 }
2208
2209 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
2211 if let Some(ref controller) = self.drift_controller {
2212 let period = self.date_to_period(date);
2213 controller.compute_adjustments(period)
2214 } else {
2215 DriftAdjustments::none()
2216 }
2217 }
2218
2219 #[inline]
2221 fn select_user(&mut self, is_automated: bool) -> (String, String) {
2222 if let Some(ref pool) = self.user_pool {
2223 let persona = if is_automated {
2224 UserPersona::AutomatedSystem
2225 } else {
2226 let roll: f64 = self.rng.random();
2228 if roll < 0.4 {
2229 UserPersona::JuniorAccountant
2230 } else if roll < 0.7 {
2231 UserPersona::SeniorAccountant
2232 } else if roll < 0.85 {
2233 UserPersona::Controller
2234 } else {
2235 UserPersona::Manager
2236 }
2237 };
2238
2239 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
2240 return (user.user_id.clone(), user.persona.to_string());
2241 }
2242 }
2243
2244 if is_automated {
2246 (
2247 format!("BATCH{:04}", self.rng.random_range(1..=20)),
2248 "automated_system".to_string(),
2249 )
2250 } else {
2251 (
2252 format!("USER{:04}", self.rng.random_range(1..=40)),
2253 "senior_accountant".to_string(),
2254 )
2255 }
2256 }
2257
2258 #[inline]
2260 fn select_source(&mut self) -> TransactionSource {
2261 let roll: f64 = self.rng.random();
2262 let dist = &self.config.source_distribution;
2263
2264 if roll < dist.manual {
2265 TransactionSource::Manual
2266 } else if roll < dist.manual + dist.automated {
2267 TransactionSource::Automated
2268 } else if roll < dist.manual + dist.automated + dist.recurring {
2269 TransactionSource::Recurring
2270 } else {
2271 TransactionSource::Adjustment
2272 }
2273 }
2274
2275 #[inline]
2277 fn document_type_for_process(process: BusinessProcess) -> &'static str {
2286 match process {
2287 BusinessProcess::P2P => "KR",
2288 BusinessProcess::O2C => "DR",
2289 BusinessProcess::R2R => "SA",
2290 BusinessProcess::H2R => "HR",
2291 BusinessProcess::A2R => "AA",
2292 _ => "SA",
2293 }
2294 }
2295
2296 fn select_business_process(&mut self) -> BusinessProcess {
2297 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
2298 }
2299
2300 #[inline]
2301 fn select_debit_account(&mut self) -> &GLAccount {
2302 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2303 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2304
2305 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2307 accounts
2308 } else {
2309 expense_accounts
2310 };
2311
2312 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2313 tracing::warn!(
2314 "Account selection returned empty list, falling back to first COA account"
2315 );
2316 &self.coa.accounts[0]
2317 })
2318 }
2319
2320 #[inline]
2321 fn select_credit_account(&mut self) -> &GLAccount {
2322 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2323 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2324
2325 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2327 liability_accounts
2328 } else {
2329 revenue_accounts
2330 };
2331
2332 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2333 tracing::warn!(
2334 "Account selection returned empty list, falling back to first COA account"
2335 );
2336 &self.coa.accounts[0]
2337 })
2338 }
2339}
2340
2341impl Generator for JournalEntryGenerator {
2342 type Item = JournalEntry;
2343 type Config = (
2344 TransactionConfig,
2345 Arc<ChartOfAccounts>,
2346 Vec<String>,
2347 NaiveDate,
2348 NaiveDate,
2349 );
2350
2351 fn new(config: Self::Config, seed: u64) -> Self {
2352 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2353 }
2354
2355 fn generate_one(&mut self) -> Self::Item {
2356 self.generate()
2357 }
2358
2359 fn reset(&mut self) {
2360 self.rng = seeded_rng(self.seed, 0);
2361 self.line_sampler.reset(self.seed + 1);
2362 self.amount_sampler.reset(self.seed + 2);
2363 self.temporal_sampler.reset(self.seed + 3);
2364 if let Some(ref mut adv) = self.advanced_amount_sampler {
2365 adv.reset(self.seed + 2);
2366 }
2367 self.count = 0;
2368 self.uuid_factory.reset();
2369
2370 let mut ref_gen = ReferenceGenerator::new(
2372 self.start_date.year(),
2373 self.companies
2374 .first()
2375 .map(std::string::String::as_str)
2376 .unwrap_or("1000"),
2377 );
2378 ref_gen.set_prefix(
2379 ReferenceType::Invoice,
2380 &self.template_config.references.invoice_prefix,
2381 );
2382 ref_gen.set_prefix(
2383 ReferenceType::PurchaseOrder,
2384 &self.template_config.references.po_prefix,
2385 );
2386 ref_gen.set_prefix(
2387 ReferenceType::SalesOrder,
2388 &self.template_config.references.so_prefix,
2389 );
2390 self.reference_generator = ref_gen;
2391 }
2392
2393 fn count(&self) -> u64 {
2394 self.count
2395 }
2396
2397 fn seed(&self) -> u64 {
2398 self.seed
2399 }
2400}
2401
2402use datasynth_core::traits::ParallelGenerator;
2403
2404impl ParallelGenerator for JournalEntryGenerator {
2405 fn split(self, parts: usize) -> Vec<Self> {
2411 let parts = parts.max(1);
2412 (0..parts)
2413 .map(|i| {
2414 let sub_seed = self
2416 .seed
2417 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2418
2419 let mut gen = JournalEntryGenerator::new_with_full_config(
2420 self.config.clone(),
2421 Arc::clone(&self.coa),
2422 self.companies.clone(),
2423 self.start_date,
2424 self.end_date,
2425 sub_seed,
2426 self.template_config.clone(),
2427 self.user_pool.clone(),
2428 );
2429
2430 gen.company_selector = self.company_selector.clone();
2432 gen.vendor_pool = self.vendor_pool.clone();
2433 gen.customer_pool = self.customer_pool.clone();
2434 gen.material_pool = self.material_pool.clone();
2435 gen.using_real_master_data = self.using_real_master_data;
2436 gen.fraud_config = self.fraud_config.clone();
2437 gen.persona_errors_enabled = self.persona_errors_enabled;
2438 gen.approval_enabled = self.approval_enabled;
2439 gen.approval_threshold = self.approval_threshold;
2440 gen.sod_violation_rate = self.sod_violation_rate;
2441 if let Some(mut adv) = self.advanced_amount_sampler.clone() {
2446 adv.reset(sub_seed.wrapping_add(2));
2447 gen.advanced_amount_sampler = Some(adv);
2448 }
2449 if let Some(mut cond) = self.conditional_amount_override.clone() {
2452 cond.reset(sub_seed.wrapping_add(17));
2453 gen.conditional_amount_override = Some(cond);
2454 }
2455 if let Some(mut cop) = self.correlation_copula.clone() {
2457 cop.reset(sub_seed.wrapping_add(31));
2458 gen.correlation_copula = Some(cop);
2459 }
2460
2461 gen.uuid_factory = DeterministicUuidFactory::for_partition(
2463 sub_seed,
2464 GeneratorType::JournalEntry,
2465 i as u8,
2466 );
2467
2468 if let Some(ref config) = self.temporal_patterns_config {
2470 gen.temporal_patterns_config = Some(config.clone());
2471 if config.business_days.enabled {
2473 if let Some(ref bdc) = self.business_day_calculator {
2474 gen.business_day_calculator = Some(bdc.clone());
2475 }
2476 }
2477 if config.processing_lags.enabled {
2479 let lag_config =
2480 Self::convert_processing_lag_config(&config.processing_lags);
2481 gen.processing_lag_calculator =
2482 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2483 }
2484 }
2485
2486 if let Some(ref dc) = self.drift_controller {
2488 gen.drift_controller = Some(dc.clone());
2489 }
2490
2491 gen
2492 })
2493 .collect()
2494 }
2495}
2496
2497#[cfg(test)]
2498#[allow(clippy::unwrap_used)]
2499mod tests {
2500 use super::*;
2501 use crate::ChartOfAccountsGenerator;
2502
2503 #[test]
2504 fn test_generate_balanced_entries() {
2505 let mut coa_gen =
2506 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2507 let coa = Arc::new(coa_gen.generate());
2508
2509 let mut je_gen = JournalEntryGenerator::new_with_params(
2510 TransactionConfig::default(),
2511 coa,
2512 vec!["1000".to_string()],
2513 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2514 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2515 42,
2516 );
2517
2518 let mut balanced_count = 0;
2519 for _ in 0..100 {
2520 let entry = je_gen.generate();
2521
2522 let has_human_error = entry
2524 .header
2525 .header_text
2526 .as_ref()
2527 .map(|t| t.contains("[HUMAN_ERROR:"))
2528 .unwrap_or(false);
2529
2530 if !has_human_error {
2531 assert!(
2532 entry.is_balanced(),
2533 "Entry {:?} is not balanced",
2534 entry.header.document_id
2535 );
2536 balanced_count += 1;
2537 }
2538 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2539 }
2540
2541 assert!(
2543 balanced_count >= 80,
2544 "Expected at least 80 balanced entries, got {}",
2545 balanced_count
2546 );
2547 }
2548
2549 #[test]
2550 fn test_deterministic_generation() {
2551 let mut coa_gen =
2552 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2553 let coa = Arc::new(coa_gen.generate());
2554
2555 let mut gen1 = JournalEntryGenerator::new_with_params(
2556 TransactionConfig::default(),
2557 Arc::clone(&coa),
2558 vec!["1000".to_string()],
2559 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2560 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2561 42,
2562 );
2563
2564 let mut gen2 = JournalEntryGenerator::new_with_params(
2565 TransactionConfig::default(),
2566 coa,
2567 vec!["1000".to_string()],
2568 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2569 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2570 42,
2571 );
2572
2573 for _ in 0..50 {
2574 let e1 = gen1.generate();
2575 let e2 = gen2.generate();
2576 assert_eq!(e1.header.document_id, e2.header.document_id);
2577 assert_eq!(e1.total_debit(), e2.total_debit());
2578 }
2579 }
2580
2581 #[test]
2582 fn test_templates_generate_descriptions() {
2583 let mut coa_gen =
2584 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2585 let coa = Arc::new(coa_gen.generate());
2586
2587 let template_config = TemplateConfig {
2589 names: datasynth_config::schema::NameTemplateConfig {
2590 generate_realistic_names: true,
2591 email_domain: "test.com".to_string(),
2592 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2593 },
2594 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2595 generate_header_text: true,
2596 generate_line_text: true,
2597 },
2598 references: datasynth_config::schema::ReferenceTemplateConfig {
2599 generate_references: true,
2600 invoice_prefix: "TEST-INV".to_string(),
2601 po_prefix: "TEST-PO".to_string(),
2602 so_prefix: "TEST-SO".to_string(),
2603 },
2604 path: None,
2605 merge_strategy: datasynth_config::TemplateMergeStrategy::default(),
2606 };
2607
2608 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2609 TransactionConfig::default(),
2610 coa,
2611 vec!["1000".to_string()],
2612 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2613 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2614 42,
2615 template_config,
2616 None,
2617 )
2618 .with_persona_errors(false); for _ in 0..10 {
2621 let entry = je_gen.generate();
2622
2623 assert!(
2625 entry.header.header_text.is_some(),
2626 "Header text should be populated"
2627 );
2628
2629 assert!(
2631 entry.header.reference.is_some(),
2632 "Reference should be populated"
2633 );
2634
2635 assert!(
2637 entry.header.business_process.is_some(),
2638 "Business process should be set"
2639 );
2640
2641 for line in &entry.lines {
2643 assert!(line.line_text.is_some(), "Line text should be populated");
2644 }
2645
2646 assert!(entry.is_balanced());
2648 }
2649 }
2650
2651 #[test]
2652 fn test_user_pool_integration() {
2653 let mut coa_gen =
2654 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2655 let coa = Arc::new(coa_gen.generate());
2656
2657 let companies = vec!["1000".to_string()];
2658
2659 let mut user_gen = crate::UserGenerator::new(42);
2661 let user_pool = user_gen.generate_standard(&companies);
2662
2663 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2664 TransactionConfig::default(),
2665 coa,
2666 companies,
2667 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2668 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2669 42,
2670 TemplateConfig::default(),
2671 Some(user_pool),
2672 );
2673
2674 for _ in 0..20 {
2676 let entry = je_gen.generate();
2677
2678 assert!(!entry.header.created_by.is_empty());
2681 }
2682 }
2683
2684 #[test]
2685 fn test_master_data_connection() {
2686 let mut coa_gen =
2687 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2688 let coa = Arc::new(coa_gen.generate());
2689
2690 let vendors = vec![
2692 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2693 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2694 ];
2695
2696 let customers = vec![
2698 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2699 Customer::new(
2700 "C-TEST-002",
2701 "Test Customer Two",
2702 CustomerType::SmallBusiness,
2703 ),
2704 ];
2705
2706 let materials = vec![Material::new(
2708 "MAT-TEST-001",
2709 "Test Material A",
2710 MaterialType::RawMaterial,
2711 )];
2712
2713 let generator = JournalEntryGenerator::new_with_params(
2715 TransactionConfig::default(),
2716 coa,
2717 vec!["1000".to_string()],
2718 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2719 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2720 42,
2721 );
2722
2723 assert!(!generator.is_using_real_master_data());
2725
2726 let generator_with_data = generator
2728 .with_vendors(&vendors)
2729 .with_customers(&customers)
2730 .with_materials(&materials);
2731
2732 assert!(generator_with_data.is_using_real_master_data());
2734 }
2735
2736 #[test]
2737 fn test_with_master_data_convenience_method() {
2738 let mut coa_gen =
2739 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2740 let coa = Arc::new(coa_gen.generate());
2741
2742 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2743 let customers = vec![Customer::new(
2744 "C-001",
2745 "Customer One",
2746 CustomerType::Corporate,
2747 )];
2748 let materials = vec![Material::new(
2749 "MAT-001",
2750 "Material One",
2751 MaterialType::RawMaterial,
2752 )];
2753
2754 let generator = JournalEntryGenerator::new_with_params(
2755 TransactionConfig::default(),
2756 coa,
2757 vec!["1000".to_string()],
2758 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2759 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2760 42,
2761 )
2762 .with_master_data(&vendors, &customers, &materials);
2763
2764 assert!(generator.is_using_real_master_data());
2765 }
2766
2767 #[test]
2768 fn test_stress_factors_increase_error_rate() {
2769 let mut coa_gen =
2770 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2771 let coa = Arc::new(coa_gen.generate());
2772
2773 let generator = JournalEntryGenerator::new_with_params(
2774 TransactionConfig::default(),
2775 coa,
2776 vec!["1000".to_string()],
2777 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2778 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2779 42,
2780 );
2781
2782 let base_rate = 0.1;
2783
2784 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2787 assert!(
2788 (regular_rate - base_rate).abs() < 0.01,
2789 "Regular day should have minimal stress factor adjustment"
2790 );
2791
2792 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2795 assert!(
2796 month_end_rate > regular_rate,
2797 "Month end should have higher error rate than regular day"
2798 );
2799
2800 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2803 assert!(
2804 year_end_rate > month_end_rate,
2805 "Year end should have highest error rate"
2806 );
2807
2808 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2811 assert!(
2812 friday_rate > regular_rate,
2813 "Friday should have higher error rate than mid-week"
2814 );
2815
2816 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2819 assert!(
2820 monday_rate > regular_rate,
2821 "Monday should have higher error rate than mid-week"
2822 );
2823 }
2824
2825 #[test]
2826 fn test_batching_produces_similar_entries() {
2827 let mut coa_gen =
2828 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2829 let coa = Arc::new(coa_gen.generate());
2830
2831 let mut je_gen = JournalEntryGenerator::new_with_params(
2833 TransactionConfig::default(),
2834 coa,
2835 vec!["1000".to_string()],
2836 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2837 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2838 123,
2839 )
2840 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2844
2845 for entry in &entries {
2847 assert!(
2848 entry.is_balanced(),
2849 "All entries including batched should be balanced"
2850 );
2851 }
2852
2853 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2855 std::collections::HashMap::new();
2856 for entry in &entries {
2857 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2858 }
2859
2860 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2862 assert!(
2863 dates_with_multiple > 0,
2864 "With batching, should see some dates with multiple entries"
2865 );
2866 }
2867
2868 #[test]
2869 fn test_temporal_patterns_business_days() {
2870 use datasynth_config::schema::{
2871 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2872 };
2873
2874 let mut coa_gen =
2875 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2876 let coa = Arc::new(coa_gen.generate());
2877
2878 let temporal_config = TemporalPatternsConfig {
2880 enabled: true,
2881 business_days: BusinessDaySchemaConfig {
2882 enabled: true,
2883 ..Default::default()
2884 },
2885 calendars: CalendarSchemaConfig {
2886 regions: vec!["US".to_string()],
2887 custom_holidays: vec![],
2888 },
2889 ..Default::default()
2890 };
2891
2892 let mut je_gen = JournalEntryGenerator::new_with_params(
2893 TransactionConfig::default(),
2894 coa,
2895 vec!["1000".to_string()],
2896 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2897 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2899 )
2900 .with_temporal_patterns(temporal_config, 42)
2901 .with_persona_errors(false);
2902
2903 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2905
2906 for entry in &entries {
2907 let weekday = entry.header.posting_date.weekday();
2908 assert!(
2909 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2910 "Posting date {:?} should not be a weekend",
2911 entry.header.posting_date
2912 );
2913 }
2914 }
2915
2916 #[test]
2917 fn test_default_generation_filters_weekends() {
2918 let mut coa_gen =
2922 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2923 let coa = Arc::new(coa_gen.generate());
2924
2925 let mut je_gen = JournalEntryGenerator::new_with_params(
2926 TransactionConfig::default(),
2927 coa,
2928 vec!["1000".to_string()],
2929 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2930 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2931 42,
2932 )
2933 .with_persona_errors(false);
2934
2935 let total = 500;
2936 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2937
2938 let weekend_count = entries
2939 .iter()
2940 .filter(|e| {
2941 let wd = e.header.posting_date.weekday();
2942 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2943 })
2944 .count();
2945
2946 let weekend_pct = weekend_count as f64 / total as f64;
2947 assert!(
2948 weekend_pct < 0.05,
2949 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2950 but got {:.1}% ({}/{})",
2951 weekend_pct * 100.0,
2952 weekend_count,
2953 total
2954 );
2955 }
2956
2957 #[test]
2958 fn test_document_type_derived_from_business_process() {
2959 let mut coa_gen =
2960 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2961 let coa = Arc::new(coa_gen.generate());
2962
2963 let mut je_gen = JournalEntryGenerator::new_with_params(
2964 TransactionConfig::default(),
2965 coa,
2966 vec!["1000".to_string()],
2967 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2968 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2969 99,
2970 )
2971 .with_persona_errors(false)
2972 .with_batching(false);
2973
2974 let total = 200;
2975 let mut doc_types = std::collections::HashSet::new();
2976 let mut sa_count = 0_usize;
2977
2978 for _ in 0..total {
2979 let entry = je_gen.generate();
2980 let dt = &entry.header.document_type;
2981 doc_types.insert(dt.clone());
2982 if dt == "SA" {
2983 sa_count += 1;
2984 }
2985 }
2986
2987 assert!(
2989 doc_types.len() > 3,
2990 "Expected >3 distinct document types, got {} ({:?})",
2991 doc_types.len(),
2992 doc_types,
2993 );
2994
2995 let sa_pct = sa_count as f64 / total as f64;
2997 assert!(
2998 sa_pct < 0.50,
2999 "Expected SA <50%, got {:.1}% ({}/{})",
3000 sa_pct * 100.0,
3001 sa_count,
3002 total,
3003 );
3004 }
3005
3006 #[test]
3007 fn test_enrich_line_items_account_description() {
3008 let mut coa_gen =
3009 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3010 let coa = Arc::new(coa_gen.generate());
3011
3012 let mut je_gen = JournalEntryGenerator::new_with_params(
3013 TransactionConfig::default(),
3014 coa,
3015 vec!["1000".to_string()],
3016 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3017 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3018 42,
3019 )
3020 .with_persona_errors(false);
3021
3022 let total = 200;
3023 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3024
3025 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3027 let lines_with_desc: usize = entries
3028 .iter()
3029 .flat_map(|e| &e.lines)
3030 .filter(|l| l.account_description.is_some())
3031 .count();
3032
3033 let desc_pct = lines_with_desc as f64 / total_lines as f64;
3034 assert!(
3035 desc_pct > 0.95,
3036 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
3037 desc_pct * 100.0,
3038 lines_with_desc,
3039 total_lines,
3040 );
3041 }
3042
3043 #[test]
3044 fn test_enrich_line_items_cost_center_for_expense_accounts() {
3045 let mut coa_gen =
3046 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3047 let coa = Arc::new(coa_gen.generate());
3048
3049 let mut je_gen = JournalEntryGenerator::new_with_params(
3050 TransactionConfig::default(),
3051 coa,
3052 vec!["1000".to_string()],
3053 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3054 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3055 42,
3056 )
3057 .with_persona_errors(false);
3058
3059 let total = 300;
3060 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3061
3062 let expense_lines: Vec<&JournalEntryLine> = entries
3064 .iter()
3065 .flat_map(|e| &e.lines)
3066 .filter(|l| {
3067 let first = l.gl_account.chars().next().unwrap_or('0');
3068 first == '5' || first == '6'
3069 })
3070 .collect();
3071
3072 if !expense_lines.is_empty() {
3073 let with_cc = expense_lines
3074 .iter()
3075 .filter(|l| l.cost_center.is_some())
3076 .count();
3077 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
3078 assert!(
3079 cc_pct > 0.80,
3080 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
3081 cc_pct * 100.0,
3082 with_cc,
3083 expense_lines.len(),
3084 );
3085 }
3086 }
3087
3088 #[test]
3089 fn test_enrich_line_items_profit_center_and_line_text() {
3090 let mut coa_gen =
3091 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3092 let coa = Arc::new(coa_gen.generate());
3093
3094 let mut je_gen = JournalEntryGenerator::new_with_params(
3095 TransactionConfig::default(),
3096 coa,
3097 vec!["1000".to_string()],
3098 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3099 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3100 42,
3101 )
3102 .with_persona_errors(false);
3103
3104 let total = 100;
3105 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3106
3107 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
3108
3109 let with_pc = entries
3111 .iter()
3112 .flat_map(|e| &e.lines)
3113 .filter(|l| l.profit_center.is_some())
3114 .count();
3115 let pc_pct = with_pc as f64 / total_lines as f64;
3116 assert!(
3117 pc_pct > 0.95,
3118 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
3119 pc_pct * 100.0,
3120 with_pc,
3121 total_lines,
3122 );
3123
3124 let with_text = entries
3126 .iter()
3127 .flat_map(|e| &e.lines)
3128 .filter(|l| l.line_text.is_some())
3129 .count();
3130 let text_pct = with_text as f64 / total_lines as f64;
3131 assert!(
3132 text_pct > 0.95,
3133 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
3134 text_pct * 100.0,
3135 with_text,
3136 total_lines,
3137 );
3138 }
3139
3140 #[test]
3143 fn test_je_has_audit_flags() {
3144 let mut coa_gen =
3145 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3146 let coa = Arc::new(coa_gen.generate());
3147
3148 let mut je_gen = JournalEntryGenerator::new_with_params(
3149 TransactionConfig::default(),
3150 coa,
3151 vec!["1000".to_string()],
3152 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3153 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3154 42,
3155 )
3156 .with_persona_errors(false);
3157
3158 for _ in 0..100 {
3159 let entry = je_gen.generate();
3160
3161 assert!(
3163 !entry.header.source_system.is_empty(),
3164 "source_system should be populated, got empty string"
3165 );
3166
3167 assert!(
3169 !entry.header.created_by.is_empty(),
3170 "created_by should be populated"
3171 );
3172
3173 assert!(
3175 entry.header.created_date.is_some(),
3176 "created_date should be populated"
3177 );
3178 }
3179 }
3180
3181 #[test]
3182 fn test_manual_entry_rate() {
3183 let mut coa_gen =
3184 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3185 let coa = Arc::new(coa_gen.generate());
3186
3187 let mut je_gen = JournalEntryGenerator::new_with_params(
3188 TransactionConfig::default(),
3189 coa,
3190 vec!["1000".to_string()],
3191 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3192 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3193 42,
3194 )
3195 .with_persona_errors(false)
3196 .with_batching(false);
3197
3198 let total = 1000;
3199 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
3200
3201 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
3202 let manual_rate = manual_count as f64 / total as f64;
3203
3204 assert!(
3207 manual_rate > 0.01 && manual_rate < 0.50,
3208 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
3209 manual_rate * 100.0,
3210 manual_count,
3211 total,
3212 );
3213
3214 for entry in &entries {
3216 let source_is_manual = entry.header.source == TransactionSource::Manual;
3217 assert_eq!(
3218 entry.header.is_manual, source_is_manual,
3219 "is_manual should match source == Manual"
3220 );
3221 }
3222 }
3223
3224 #[test]
3225 fn test_manual_source_consistency() {
3226 let mut coa_gen =
3227 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3228 let coa = Arc::new(coa_gen.generate());
3229
3230 let mut je_gen = JournalEntryGenerator::new_with_params(
3231 TransactionConfig::default(),
3232 coa,
3233 vec!["1000".to_string()],
3234 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3235 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3236 42,
3237 )
3238 .with_persona_errors(false)
3239 .with_batching(false);
3240
3241 for _ in 0..500 {
3242 let entry = je_gen.generate();
3243
3244 if entry.header.is_manual {
3245 assert!(
3247 entry.header.source_system == "manual"
3248 || entry.header.source_system == "spreadsheet",
3249 "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
3250 entry.header.source_system,
3251 );
3252 } else {
3253 assert!(
3255 entry.header.source_system != "manual"
3256 && entry.header.source_system != "spreadsheet",
3257 "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
3258 entry.header.source_system,
3259 );
3260 }
3261 }
3262 }
3263
3264 #[test]
3265 fn test_created_date_before_posting() {
3266 let mut coa_gen =
3267 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
3268 let coa = Arc::new(coa_gen.generate());
3269
3270 let mut je_gen = JournalEntryGenerator::new_with_params(
3271 TransactionConfig::default(),
3272 coa,
3273 vec!["1000".to_string()],
3274 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
3275 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
3276 42,
3277 )
3278 .with_persona_errors(false);
3279
3280 for _ in 0..500 {
3281 let entry = je_gen.generate();
3282
3283 if let Some(created_date) = entry.header.created_date {
3284 let created_naive_date = created_date.date();
3285 assert!(
3286 created_naive_date <= entry.header.posting_date,
3287 "created_date ({}) should be <= posting_date ({})",
3288 created_naive_date,
3289 entry.header.posting_date,
3290 );
3291 }
3292 }
3293 }
3294}