1use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{
11 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
12};
13use datasynth_core::distributions::{
14 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
15 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
16 ProcessingLagCalculator, ProcessingLagConfig, *,
17};
18use datasynth_core::models::*;
19use datasynth_core::templates::{
20 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
21};
22use datasynth_core::traits::Generator;
23use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
24
25use crate::company_selector::WeightedCompanySelector;
26use crate::user_generator::{UserGenerator, UserGeneratorConfig};
27
28pub struct JournalEntryGenerator {
30 rng: ChaCha8Rng,
31 seed: u64,
32 config: TransactionConfig,
33 coa: Arc<ChartOfAccounts>,
34 companies: Vec<String>,
35 company_selector: WeightedCompanySelector,
36 line_sampler: LineItemSampler,
37 amount_sampler: AmountSampler,
38 temporal_sampler: TemporalSampler,
39 start_date: NaiveDate,
40 end_date: NaiveDate,
41 count: u64,
42 uuid_factory: DeterministicUuidFactory,
43 user_pool: Option<UserPool>,
45 description_generator: DescriptionGenerator,
46 reference_generator: ReferenceGenerator,
47 template_config: TemplateConfig,
48 vendor_pool: VendorPool,
49 customer_pool: CustomerPool,
50 material_pool: Option<MaterialPool>,
52 using_real_master_data: bool,
54 fraud_config: FraudConfig,
56 persona_errors_enabled: bool,
58 approval_enabled: bool,
60 approval_threshold: rust_decimal::Decimal,
61 batch_state: Option<BatchState>,
63 drift_controller: Option<DriftController>,
65 business_day_calculator: Option<BusinessDayCalculator>,
67 processing_lag_calculator: Option<ProcessingLagCalculator>,
68 temporal_patterns_config: Option<TemporalPatternsConfig>,
69}
70
71#[derive(Clone)]
76struct BatchState {
77 base_vendor: Option<String>,
79 base_customer: Option<String>,
80 base_account_number: String,
81 base_amount: rust_decimal::Decimal,
82 base_business_process: Option<BusinessProcess>,
83 base_posting_date: NaiveDate,
84 remaining: u8,
86}
87
88impl JournalEntryGenerator {
89 pub fn new_with_params(
91 config: TransactionConfig,
92 coa: Arc<ChartOfAccounts>,
93 companies: Vec<String>,
94 start_date: NaiveDate,
95 end_date: NaiveDate,
96 seed: u64,
97 ) -> Self {
98 Self::new_with_full_config(
99 config,
100 coa,
101 companies,
102 start_date,
103 end_date,
104 seed,
105 TemplateConfig::default(),
106 None,
107 )
108 }
109
110 #[allow(clippy::too_many_arguments)]
112 pub fn new_with_full_config(
113 config: TransactionConfig,
114 coa: Arc<ChartOfAccounts>,
115 companies: Vec<String>,
116 start_date: NaiveDate,
117 end_date: NaiveDate,
118 seed: u64,
119 template_config: TemplateConfig,
120 user_pool: Option<UserPool>,
121 ) -> Self {
122 let user_pool = user_pool.or_else(|| {
124 if template_config.names.generate_realistic_names {
125 let user_gen_config = UserGeneratorConfig {
126 culture_distribution: vec![
127 (
128 datasynth_core::templates::NameCulture::WesternUs,
129 template_config.names.culture_distribution.western_us,
130 ),
131 (
132 datasynth_core::templates::NameCulture::Hispanic,
133 template_config.names.culture_distribution.hispanic,
134 ),
135 (
136 datasynth_core::templates::NameCulture::German,
137 template_config.names.culture_distribution.german,
138 ),
139 (
140 datasynth_core::templates::NameCulture::French,
141 template_config.names.culture_distribution.french,
142 ),
143 (
144 datasynth_core::templates::NameCulture::Chinese,
145 template_config.names.culture_distribution.chinese,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Japanese,
149 template_config.names.culture_distribution.japanese,
150 ),
151 (
152 datasynth_core::templates::NameCulture::Indian,
153 template_config.names.culture_distribution.indian,
154 ),
155 ],
156 email_domain: template_config.names.email_domain.clone(),
157 generate_realistic_names: true,
158 };
159 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
160 Some(user_gen.generate_standard(&companies))
161 } else {
162 None
163 }
164 });
165
166 let mut ref_gen = ReferenceGenerator::new(
168 start_date.year(),
169 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
170 );
171 ref_gen.set_prefix(
172 ReferenceType::Invoice,
173 &template_config.references.invoice_prefix,
174 );
175 ref_gen.set_prefix(
176 ReferenceType::PurchaseOrder,
177 &template_config.references.po_prefix,
178 );
179 ref_gen.set_prefix(
180 ReferenceType::SalesOrder,
181 &template_config.references.so_prefix,
182 );
183
184 let company_selector = WeightedCompanySelector::uniform(companies.clone());
186
187 Self {
188 rng: ChaCha8Rng::seed_from_u64(seed),
189 seed,
190 config: config.clone(),
191 coa,
192 companies,
193 company_selector,
194 line_sampler: LineItemSampler::with_config(
195 seed + 1,
196 config.line_item_distribution.clone(),
197 config.even_odd_distribution.clone(),
198 config.debit_credit_distribution.clone(),
199 ),
200 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
201 temporal_sampler: TemporalSampler::with_config(
202 seed + 3,
203 config.seasonality.clone(),
204 WorkingHoursConfig::default(),
205 Vec::new(),
206 ),
207 start_date,
208 end_date,
209 count: 0,
210 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
211 user_pool,
212 description_generator: DescriptionGenerator::new(),
213 reference_generator: ref_gen,
214 template_config,
215 vendor_pool: VendorPool::standard(),
216 customer_pool: CustomerPool::standard(),
217 material_pool: None,
218 using_real_master_data: false,
219 fraud_config: FraudConfig::default(),
220 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
224 drift_controller: None,
225 business_day_calculator: None,
226 processing_lag_calculator: None,
227 temporal_patterns_config: None,
228 }
229 }
230
231 pub fn from_generator_config(
236 full_config: &GeneratorConfig,
237 coa: Arc<ChartOfAccounts>,
238 start_date: NaiveDate,
239 end_date: NaiveDate,
240 seed: u64,
241 ) -> Self {
242 let companies: Vec<String> = full_config
243 .companies
244 .iter()
245 .map(|c| c.code.clone())
246 .collect();
247
248 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
250
251 let mut generator = Self::new_with_full_config(
252 full_config.transactions.clone(),
253 coa,
254 companies,
255 start_date,
256 end_date,
257 seed,
258 full_config.templates.clone(),
259 None,
260 );
261
262 generator.company_selector = company_selector;
264
265 generator.fraud_config = full_config.fraud.clone();
267
268 let temporal_config = &full_config.temporal_patterns;
270 if temporal_config.enabled {
271 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
272 }
273
274 generator
275 }
276
277 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
284 if config.business_days.enabled {
286 let region = config
287 .calendars
288 .regions
289 .first()
290 .map(|r| Self::parse_region(r))
291 .unwrap_or(Region::US);
292
293 let calendar = HolidayCalendar::new(region, self.start_date.year());
294 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
295 }
296
297 if config.processing_lags.enabled {
299 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
300 self.processing_lag_calculator =
301 Some(ProcessingLagCalculator::with_config(seed, lag_config));
302 }
303
304 let model = config.period_end.model.as_deref().unwrap_or("flat");
306 if model != "flat"
307 || config
308 .period_end
309 .month_end
310 .as_ref()
311 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
312 {
313 let dynamics = Self::convert_period_end_config(&config.period_end);
314 self.temporal_sampler.set_period_end_dynamics(dynamics);
315 }
316
317 self.temporal_patterns_config = Some(config);
318 self
319 }
320
321 fn convert_processing_lag_config(
323 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
324 ) -> ProcessingLagConfig {
325 let mut config = ProcessingLagConfig {
326 enabled: schema.enabled,
327 ..Default::default()
328 };
329
330 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
332 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
333 if let Some(min) = lag.min_hours {
334 dist.min_lag_hours = min;
335 }
336 if let Some(max) = lag.max_hours {
337 dist.max_lag_hours = max;
338 }
339 dist
340 };
341
342 if let Some(ref lag) = schema.sales_order_lag {
344 config
345 .event_lags
346 .insert(EventType::SalesOrder, convert_lag(lag));
347 }
348 if let Some(ref lag) = schema.purchase_order_lag {
349 config
350 .event_lags
351 .insert(EventType::PurchaseOrder, convert_lag(lag));
352 }
353 if let Some(ref lag) = schema.goods_receipt_lag {
354 config
355 .event_lags
356 .insert(EventType::GoodsReceipt, convert_lag(lag));
357 }
358 if let Some(ref lag) = schema.invoice_receipt_lag {
359 config
360 .event_lags
361 .insert(EventType::InvoiceReceipt, convert_lag(lag));
362 }
363 if let Some(ref lag) = schema.invoice_issue_lag {
364 config
365 .event_lags
366 .insert(EventType::InvoiceIssue, convert_lag(lag));
367 }
368 if let Some(ref lag) = schema.payment_lag {
369 config
370 .event_lags
371 .insert(EventType::Payment, convert_lag(lag));
372 }
373 if let Some(ref lag) = schema.journal_entry_lag {
374 config
375 .event_lags
376 .insert(EventType::JournalEntry, convert_lag(lag));
377 }
378
379 if let Some(ref cross_day) = schema.cross_day_posting {
381 config.cross_day = CrossDayConfig {
382 enabled: cross_day.enabled,
383 probability_by_hour: cross_day.probability_by_hour.clone(),
384 ..Default::default()
385 };
386 }
387
388 config
389 }
390
391 fn convert_period_end_config(
393 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
394 ) -> PeriodEndDynamics {
395 let model_type = schema.model.as_deref().unwrap_or("exponential");
396
397 let convert_period =
399 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
400 default_peak: f64|
401 -> PeriodEndConfig {
402 if let Some(p) = period {
403 let model = match model_type {
404 "flat" => PeriodEndModel::FlatMultiplier {
405 multiplier: p.peak_multiplier.unwrap_or(default_peak),
406 },
407 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
408 start_day: p.start_day.unwrap_or(-10),
409 sustained_high_days: p.sustained_high_days.unwrap_or(3),
410 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
411 ramp_up_days: 3, },
413 _ => PeriodEndModel::ExponentialAcceleration {
414 start_day: p.start_day.unwrap_or(-10),
415 base_multiplier: p.base_multiplier.unwrap_or(1.0),
416 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
417 decay_rate: p.decay_rate.unwrap_or(0.3),
418 },
419 };
420 PeriodEndConfig {
421 enabled: true,
422 model,
423 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
424 }
425 } else {
426 PeriodEndConfig {
427 enabled: true,
428 model: PeriodEndModel::ExponentialAcceleration {
429 start_day: -10,
430 base_multiplier: 1.0,
431 peak_multiplier: default_peak,
432 decay_rate: 0.3,
433 },
434 additional_multiplier: 1.0,
435 }
436 }
437 };
438
439 PeriodEndDynamics::new(
440 convert_period(schema.month_end.as_ref(), 2.0),
441 convert_period(schema.quarter_end.as_ref(), 3.5),
442 convert_period(schema.year_end.as_ref(), 5.0),
443 )
444 }
445
446 fn parse_region(region_str: &str) -> Region {
448 match region_str.to_uppercase().as_str() {
449 "US" => Region::US,
450 "DE" => Region::DE,
451 "GB" => Region::GB,
452 "CN" => Region::CN,
453 "JP" => Region::JP,
454 "IN" => Region::IN,
455 "BR" => Region::BR,
456 "MX" => Region::MX,
457 "AU" => Region::AU,
458 "SG" => Region::SG,
459 "KR" => Region::KR,
460 _ => Region::US,
461 }
462 }
463
464 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
466 self.company_selector = selector;
467 }
468
469 pub fn company_selector(&self) -> &WeightedCompanySelector {
471 &self.company_selector
472 }
473
474 pub fn set_fraud_config(&mut self, config: FraudConfig) {
476 self.fraud_config = config;
477 }
478
479 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
484 if !vendors.is_empty() {
485 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
486 self.using_real_master_data = true;
487 }
488 self
489 }
490
491 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
496 if !customers.is_empty() {
497 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
498 self.using_real_master_data = true;
499 }
500 self
501 }
502
503 pub fn with_materials(mut self, materials: &[Material]) -> Self {
507 if !materials.is_empty() {
508 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
509 self.using_real_master_data = true;
510 }
511 self
512 }
513
514 pub fn with_master_data(
519 self,
520 vendors: &[Vendor],
521 customers: &[Customer],
522 materials: &[Material],
523 ) -> Self {
524 self.with_vendors(vendors)
525 .with_customers(customers)
526 .with_materials(materials)
527 }
528
529 pub fn is_using_real_master_data(&self) -> bool {
531 self.using_real_master_data
532 }
533
534 fn determine_fraud(&mut self) -> Option<FraudType> {
536 if !self.fraud_config.enabled {
537 return None;
538 }
539
540 if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
542 return None;
543 }
544
545 Some(self.select_fraud_type())
547 }
548
549 fn select_fraud_type(&mut self) -> FraudType {
551 let dist = &self.fraud_config.fraud_type_distribution;
552 let roll: f64 = self.rng.gen();
553
554 let mut cumulative = 0.0;
555
556 cumulative += dist.suspense_account_abuse;
557 if roll < cumulative {
558 return FraudType::SuspenseAccountAbuse;
559 }
560
561 cumulative += dist.fictitious_transaction;
562 if roll < cumulative {
563 return FraudType::FictitiousTransaction;
564 }
565
566 cumulative += dist.revenue_manipulation;
567 if roll < cumulative {
568 return FraudType::RevenueManipulation;
569 }
570
571 cumulative += dist.expense_capitalization;
572 if roll < cumulative {
573 return FraudType::ExpenseCapitalization;
574 }
575
576 cumulative += dist.split_transaction;
577 if roll < cumulative {
578 return FraudType::SplitTransaction;
579 }
580
581 cumulative += dist.timing_anomaly;
582 if roll < cumulative {
583 return FraudType::TimingAnomaly;
584 }
585
586 cumulative += dist.unauthorized_access;
587 if roll < cumulative {
588 return FraudType::UnauthorizedAccess;
589 }
590
591 FraudType::DuplicatePayment
593 }
594
595 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
597 match fraud_type {
598 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
599 FraudAmountPattern::ThresholdAdjacent
600 }
601 FraudType::FictitiousTransaction
602 | FraudType::FictitiousEntry
603 | FraudType::SuspenseAccountAbuse
604 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
605 FraudType::RevenueManipulation
606 | FraudType::ExpenseCapitalization
607 | FraudType::ImproperCapitalization
608 | FraudType::ReserveManipulation
609 | FraudType::UnauthorizedAccess
610 | FraudType::PrematureRevenue
611 | FraudType::UnderstatedLiabilities
612 | FraudType::OverstatedAssets
613 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
614 FraudType::DuplicatePayment
615 | FraudType::TimingAnomaly
616 | FraudType::SelfApproval
617 | FraudType::ExceededApprovalLimit
618 | FraudType::SegregationOfDutiesViolation
619 | FraudType::UnauthorizedApproval
620 | FraudType::CollusiveApproval
621 | FraudType::FictitiousVendor
622 | FraudType::ShellCompanyPayment
623 | FraudType::Kickback
624 | FraudType::KickbackScheme
625 | FraudType::InvoiceManipulation
626 | FraudType::AssetMisappropriation
627 | FraudType::InventoryTheft
628 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
629 FraudType::ImproperRevenueRecognition
631 | FraudType::ImproperPoAllocation
632 | FraudType::VariableConsiderationManipulation
633 | FraudType::ContractModificationMisstatement => {
634 FraudAmountPattern::StatisticallyImprobable
635 }
636 FraudType::LeaseClassificationManipulation
638 | FraudType::OffBalanceSheetLease
639 | FraudType::LeaseLiabilityUnderstatement
640 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
641 FraudType::FairValueHierarchyManipulation
643 | FraudType::Level3InputManipulation
644 | FraudType::ValuationTechniqueManipulation => {
645 FraudAmountPattern::StatisticallyImprobable
646 }
647 FraudType::DelayedImpairment
649 | FraudType::ImpairmentTestAvoidance
650 | FraudType::CashFlowProjectionManipulation
651 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
652 }
653 }
654
655 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
657 self.uuid_factory.next()
658 }
659
660 pub fn generate(&mut self) -> JournalEntry {
662 if let Some(ref state) = self.batch_state {
664 if state.remaining > 0 {
665 return self.generate_batched_entry();
666 }
667 }
668
669 self.count += 1;
670
671 let document_id = self.generate_deterministic_uuid();
673
674 let mut posting_date = self
676 .temporal_sampler
677 .sample_date(self.start_date, self.end_date);
678
679 if let Some(ref calc) = self.business_day_calculator {
681 if !calc.is_business_day(posting_date) {
682 posting_date = calc.next_business_day(posting_date, false);
684 if posting_date > self.end_date {
686 posting_date = calc.prev_business_day(self.end_date, true);
687 }
688 }
689 }
690
691 let company_code = self.company_selector.select(&mut self.rng).to_string();
693
694 let line_spec = self.line_sampler.sample();
696
697 let source = self.select_source();
699 let is_automated = matches!(
700 source,
701 TransactionSource::Automated | TransactionSource::Recurring
702 );
703
704 let business_process = self.select_business_process();
706
707 let fraud_type = self.determine_fraud();
709 let is_fraud = fraud_type.is_some();
710
711 let time = self.temporal_sampler.sample_time(!is_automated);
713 let created_at = posting_date.and_time(time).and_utc();
714
715 let (created_by, user_persona) = self.select_user(is_automated);
717
718 let mut header =
720 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
721 header.created_at = created_at;
722 header.source = source;
723 header.created_by = created_by;
724 header.user_persona = user_persona;
725 header.business_process = Some(business_process);
726 header.is_fraud = is_fraud;
727 header.fraud_type = fraud_type;
728
729 let mut context =
731 DescriptionContext::with_period(posting_date.month(), posting_date.year());
732
733 match business_process {
735 BusinessProcess::P2P => {
736 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
737 context.vendor_name = Some(vendor.name.clone());
738 }
739 }
740 BusinessProcess::O2C => {
741 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
742 context.customer_name = Some(customer.name.clone());
743 }
744 }
745 _ => {}
746 }
747
748 if self.template_config.descriptions.generate_header_text {
750 header.header_text = Some(self.description_generator.generate_header_text(
751 business_process,
752 &context,
753 &mut self.rng,
754 ));
755 }
756
757 if self.template_config.references.generate_references {
759 header.reference = Some(
760 self.reference_generator
761 .generate_for_process_year(business_process, posting_date.year()),
762 );
763 }
764
765 let mut entry = JournalEntry::new(header);
767
768 let base_amount = if let Some(ft) = fraud_type {
770 let pattern = self.fraud_type_to_amount_pattern(ft);
771 self.amount_sampler.sample_fraud(pattern)
772 } else {
773 self.amount_sampler.sample()
774 };
775
776 let drift_adjusted_amount = {
778 let drift = self.get_drift_adjustments(posting_date);
779 if drift.amount_mean_multiplier != 1.0 {
780 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
782 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
783 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
784 } else {
785 base_amount
786 }
787 };
788
789 let total_amount = if is_automated {
791 drift_adjusted_amount } else {
793 self.apply_human_variation(drift_adjusted_amount)
794 };
795
796 let debit_amounts = self
798 .amount_sampler
799 .sample_summing_to(line_spec.debit_count, total_amount);
800 for (i, amount) in debit_amounts.into_iter().enumerate() {
801 let account_number = self.select_debit_account().account_number.clone();
802 let mut line = JournalEntryLine::debit(
803 entry.header.document_id,
804 (i + 1) as u32,
805 account_number.clone(),
806 amount,
807 );
808
809 if self.template_config.descriptions.generate_line_text {
811 line.line_text = Some(self.description_generator.generate_line_text(
812 &account_number,
813 &context,
814 &mut self.rng,
815 ));
816 }
817
818 entry.add_line(line);
819 }
820
821 let credit_amounts = self
823 .amount_sampler
824 .sample_summing_to(line_spec.credit_count, total_amount);
825 for (i, amount) in credit_amounts.into_iter().enumerate() {
826 let account_number = self.select_credit_account().account_number.clone();
827 let mut line = JournalEntryLine::credit(
828 entry.header.document_id,
829 (line_spec.debit_count + i + 1) as u32,
830 account_number.clone(),
831 amount,
832 );
833
834 if self.template_config.descriptions.generate_line_text {
836 line.line_text = Some(self.description_generator.generate_line_text(
837 &account_number,
838 &context,
839 &mut self.rng,
840 ));
841 }
842
843 entry.add_line(line);
844 }
845
846 if self.persona_errors_enabled && !is_automated {
848 self.maybe_inject_persona_error(&mut entry);
849 }
850
851 if self.approval_enabled {
853 self.maybe_apply_approval_workflow(&mut entry, posting_date);
854 }
855
856 self.maybe_start_batch(&entry);
858
859 entry
860 }
861
862 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
867 self.persona_errors_enabled = enabled;
868 self
869 }
870
871 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
876 self.fraud_config = config;
877 self
878 }
879
880 pub fn persona_errors_enabled(&self) -> bool {
882 self.persona_errors_enabled
883 }
884
885 pub fn with_batching(mut self, enabled: bool) -> Self {
890 if !enabled {
891 self.batch_state = None;
892 }
893 self
894 }
895
896 pub fn batching_enabled(&self) -> bool {
898 true
900 }
901
902 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
907 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
909 return;
910 }
911
912 if self.rng.gen::<f64>() > 0.15 {
914 return;
915 }
916
917 let base_account = entry
919 .lines
920 .first()
921 .map(|l| l.gl_account.clone())
922 .unwrap_or_default();
923
924 let base_amount = entry.total_debit();
925
926 self.batch_state = Some(BatchState {
927 base_vendor: None, base_customer: None,
929 base_account_number: base_account,
930 base_amount,
931 base_business_process: entry.header.business_process,
932 base_posting_date: entry.header.posting_date,
933 remaining: self.rng.gen_range(2..7), });
935 }
936
937 fn generate_batched_entry(&mut self) -> JournalEntry {
945 use rust_decimal::Decimal;
946
947 if let Some(ref mut state) = self.batch_state {
949 state.remaining = state.remaining.saturating_sub(1);
950 }
951
952 let batch = self.batch_state.clone().unwrap();
953
954 let posting_date = batch.base_posting_date;
956
957 self.count += 1;
958 let document_id = self.generate_deterministic_uuid();
959
960 let company_code = self.company_selector.select(&mut self.rng).to_string();
962
963 let _line_spec = LineItemSpec {
965 total_count: 2,
966 debit_count: 1,
967 credit_count: 1,
968 split_type: DebitCreditSplit::Equal,
969 };
970
971 let source = TransactionSource::Manual;
973
974 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
976
977 let time = self.temporal_sampler.sample_time(true);
979 let created_at = posting_date.and_time(time).and_utc();
980
981 let (created_by, user_persona) = self.select_user(false);
983
984 let mut header =
986 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
987 header.created_at = created_at;
988 header.source = source;
989 header.created_by = created_by;
990 header.user_persona = user_persona;
991 header.business_process = Some(business_process);
992
993 let variation = self.rng.gen_range(-0.15..0.15);
995 let varied_amount =
996 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
997 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
998
999 let mut entry = JournalEntry::new(header);
1001
1002 let debit_line = JournalEntryLine::debit(
1004 entry.header.document_id,
1005 1,
1006 batch.base_account_number.clone(),
1007 total_amount,
1008 );
1009 entry.add_line(debit_line);
1010
1011 let credit_account = self.select_credit_account().account_number.clone();
1013 let credit_line =
1014 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1015 entry.add_line(credit_line);
1016
1017 if self.persona_errors_enabled {
1019 self.maybe_inject_persona_error(&mut entry);
1020 }
1021
1022 if self.approval_enabled {
1024 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1025 }
1026
1027 if batch.remaining <= 1 {
1029 self.batch_state = None;
1030 }
1031
1032 entry
1033 }
1034
1035 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1037 let persona_str = &entry.header.user_persona;
1039 let persona = match persona_str.to_lowercase().as_str() {
1040 s if s.contains("junior") => UserPersona::JuniorAccountant,
1041 s if s.contains("senior") => UserPersona::SeniorAccountant,
1042 s if s.contains("controller") => UserPersona::Controller,
1043 s if s.contains("manager") => UserPersona::Manager,
1044 s if s.contains("executive") => UserPersona::Executive,
1045 _ => return, };
1047
1048 let base_error_rate = persona.error_rate();
1050
1051 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1053
1054 if self.rng.gen::<f64>() >= adjusted_rate {
1056 return; }
1058
1059 self.inject_human_error(entry, persona);
1061 }
1062
1063 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1072 use chrono::Datelike;
1073
1074 let mut rate = base_rate;
1075 let day = posting_date.day();
1076 let month = posting_date.month();
1077
1078 if month == 12 && day >= 28 {
1080 rate *= 2.0;
1081 return rate.min(0.5); }
1083
1084 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1086 rate *= 1.75; return rate.min(0.4);
1088 }
1089
1090 if day >= 28 {
1092 rate *= 1.5; }
1094
1095 let weekday = posting_date.weekday();
1097 match weekday {
1098 chrono::Weekday::Mon => {
1099 rate *= 1.2;
1101 }
1102 chrono::Weekday::Fri => {
1103 rate *= 1.3;
1105 }
1106 _ => {}
1107 }
1108
1109 rate.min(0.4)
1111 }
1112
1113 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1122 use rust_decimal::Decimal;
1123
1124 if amount < Decimal::from(10) {
1126 return amount;
1127 }
1128
1129 if self.rng.gen::<f64>() > 0.70 {
1131 return amount;
1132 }
1133
1134 let variation_type: u8 = self.rng.gen_range(0..4);
1136
1137 match variation_type {
1138 0 => {
1139 let variation_pct = self.rng.gen_range(-0.02..0.02);
1141 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1142 (amount + variation).round_dp(2)
1143 }
1144 1 => {
1145 let ten = Decimal::from(10);
1147 (amount / ten).round() * ten
1148 }
1149 2 => {
1150 if amount >= Decimal::from(500) {
1152 let hundred = Decimal::from(100);
1153 (amount / hundred).round() * hundred
1154 } else {
1155 amount
1156 }
1157 }
1158 3 => {
1159 let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1161 (amount + cents).max(Decimal::ZERO).round_dp(2)
1162 }
1163 _ => amount,
1164 }
1165 }
1166
1167 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1173 let balancing_idx = entry.lines.iter().position(|l| {
1175 if modified_was_debit {
1176 l.credit_amount > Decimal::ZERO
1177 } else {
1178 l.debit_amount > Decimal::ZERO
1179 }
1180 });
1181
1182 if let Some(idx) = balancing_idx {
1183 if modified_was_debit {
1184 entry.lines[idx].credit_amount += impact;
1185 } else {
1186 entry.lines[idx].debit_amount += impact;
1187 }
1188 }
1189 }
1190
1191 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1196 use rust_decimal::Decimal;
1197
1198 let error_type: u8 = match persona {
1200 UserPersona::JuniorAccountant => {
1201 self.rng.gen_range(0..5)
1203 }
1204 UserPersona::SeniorAccountant => {
1205 self.rng.gen_range(0..3)
1207 }
1208 UserPersona::Controller | UserPersona::Manager => {
1209 self.rng.gen_range(3..5)
1211 }
1212 _ => return,
1213 };
1214
1215 match error_type {
1216 0 => {
1217 if let Some(line) = entry.lines.get_mut(0) {
1219 let is_debit = line.debit_amount > Decimal::ZERO;
1220 let original_amount = if is_debit {
1221 line.debit_amount
1222 } else {
1223 line.credit_amount
1224 };
1225
1226 let s = original_amount.to_string();
1228 if s.len() >= 2 {
1229 let chars: Vec<char> = s.chars().collect();
1230 let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1231 if chars[pos].is_ascii_digit()
1232 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1233 {
1234 let mut new_chars = chars;
1235 new_chars.swap(pos, pos + 1);
1236 if let Ok(new_amount) =
1237 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1238 {
1239 let impact = new_amount - original_amount;
1240
1241 if is_debit {
1243 entry.lines[0].debit_amount = new_amount;
1244 } else {
1245 entry.lines[0].credit_amount = new_amount;
1246 }
1247
1248 Self::rebalance_entry(entry, is_debit, impact);
1250
1251 entry.header.header_text = Some(
1252 entry.header.header_text.clone().unwrap_or_default()
1253 + " [HUMAN_ERROR:TRANSPOSITION]",
1254 );
1255 }
1256 }
1257 }
1258 }
1259 }
1260 1 => {
1261 if let Some(line) = entry.lines.get_mut(0) {
1263 let is_debit = line.debit_amount > Decimal::ZERO;
1264 let original_amount = if is_debit {
1265 line.debit_amount
1266 } else {
1267 line.credit_amount
1268 };
1269
1270 let new_amount = original_amount * Decimal::new(10, 0);
1271 let impact = new_amount - original_amount;
1272
1273 if is_debit {
1275 entry.lines[0].debit_amount = new_amount;
1276 } else {
1277 entry.lines[0].credit_amount = new_amount;
1278 }
1279
1280 Self::rebalance_entry(entry, is_debit, impact);
1282
1283 entry.header.header_text = Some(
1284 entry.header.header_text.clone().unwrap_or_default()
1285 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1286 );
1287 }
1288 }
1289 2 => {
1290 if let Some(ref mut text) = entry.header.header_text {
1292 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1293 let correct = ["the", "and", "with", "that", "receive"];
1294 let idx = self.rng.gen_range(0..typos.len());
1295 if text.to_lowercase().contains(correct[idx]) {
1296 *text = text.replace(correct[idx], typos[idx]);
1297 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1298 }
1299 }
1300 }
1301 3 => {
1302 if let Some(line) = entry.lines.get_mut(0) {
1304 let is_debit = line.debit_amount > Decimal::ZERO;
1305 let original_amount = if is_debit {
1306 line.debit_amount
1307 } else {
1308 line.credit_amount
1309 };
1310
1311 let new_amount =
1312 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1313 let impact = new_amount - original_amount;
1314
1315 if is_debit {
1317 entry.lines[0].debit_amount = new_amount;
1318 } else {
1319 entry.lines[0].credit_amount = new_amount;
1320 }
1321
1322 Self::rebalance_entry(entry, is_debit, impact);
1324
1325 entry.header.header_text = Some(
1326 entry.header.header_text.clone().unwrap_or_default()
1327 + " [HUMAN_ERROR:ROUNDED]",
1328 );
1329 }
1330 }
1331 4 => {
1332 if entry.header.document_date == entry.header.posting_date {
1335 let days_late = self.rng.gen_range(5..15);
1336 entry.header.document_date =
1337 entry.header.posting_date - chrono::Duration::days(days_late);
1338 entry.header.header_text = Some(
1339 entry.header.header_text.clone().unwrap_or_default()
1340 + " [HUMAN_ERROR:LATE_POSTING]",
1341 );
1342 }
1343 }
1344 _ => {}
1345 }
1346 }
1347
1348 fn maybe_apply_approval_workflow(
1353 &mut self,
1354 entry: &mut JournalEntry,
1355 _posting_date: NaiveDate,
1356 ) {
1357 use rust_decimal::Decimal;
1358
1359 let amount = entry.total_debit();
1360
1361 if amount <= self.approval_threshold {
1363 let workflow = ApprovalWorkflow::auto_approved(
1365 entry.header.created_by.clone(),
1366 entry.header.user_persona.clone(),
1367 amount,
1368 entry.header.created_at,
1369 );
1370 entry.header.approval_workflow = Some(workflow);
1371 return;
1372 }
1373
1374 entry.header.sox_relevant = true;
1376
1377 let required_levels = if amount > Decimal::new(100000, 0) {
1379 3 } else if amount > Decimal::new(50000, 0) {
1381 2 } else {
1383 1 };
1385
1386 let mut workflow = ApprovalWorkflow::new(
1388 entry.header.created_by.clone(),
1389 entry.header.user_persona.clone(),
1390 amount,
1391 );
1392 workflow.required_levels = required_levels;
1393
1394 let submit_time = entry.header.created_at;
1396 let submit_action = ApprovalAction::new(
1397 entry.header.created_by.clone(),
1398 entry.header.user_persona.clone(),
1399 self.parse_persona(&entry.header.user_persona),
1400 ApprovalActionType::Submit,
1401 0,
1402 )
1403 .with_timestamp(submit_time);
1404
1405 workflow.actions.push(submit_action);
1406 workflow.status = ApprovalStatus::Pending;
1407 workflow.submitted_at = Some(submit_time);
1408
1409 let mut current_time = submit_time;
1411 for level in 1..=required_levels {
1412 let delay_hours = self.rng.gen_range(1..4);
1414 current_time += chrono::Duration::hours(delay_hours);
1415
1416 while current_time.weekday() == chrono::Weekday::Sat
1418 || current_time.weekday() == chrono::Weekday::Sun
1419 {
1420 current_time += chrono::Duration::days(1);
1421 }
1422
1423 let (approver_id, approver_role) = self.select_approver(level);
1425
1426 let approve_action = ApprovalAction::new(
1427 approver_id.clone(),
1428 format!("{:?}", approver_role),
1429 approver_role,
1430 ApprovalActionType::Approve,
1431 level,
1432 )
1433 .with_timestamp(current_time);
1434
1435 workflow.actions.push(approve_action);
1436 workflow.current_level = level;
1437 }
1438
1439 workflow.status = ApprovalStatus::Approved;
1441 workflow.approved_at = Some(current_time);
1442
1443 entry.header.approval_workflow = Some(workflow);
1444 }
1445
1446 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1448 let persona = match level {
1449 1 => UserPersona::Manager,
1450 2 => UserPersona::Controller,
1451 _ => UserPersona::Executive,
1452 };
1453
1454 if let Some(ref pool) = self.user_pool {
1456 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1457 return (user.user_id.clone(), persona);
1458 }
1459 }
1460
1461 let approver_id = match persona {
1463 UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1464 UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1465 UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1466 _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1467 };
1468
1469 (approver_id, persona)
1470 }
1471
1472 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1474 match persona_str.to_lowercase().as_str() {
1475 s if s.contains("junior") => UserPersona::JuniorAccountant,
1476 s if s.contains("senior") => UserPersona::SeniorAccountant,
1477 s if s.contains("controller") => UserPersona::Controller,
1478 s if s.contains("manager") => UserPersona::Manager,
1479 s if s.contains("executive") => UserPersona::Executive,
1480 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1481 _ => UserPersona::JuniorAccountant, }
1483 }
1484
1485 pub fn with_approval(mut self, enabled: bool) -> Self {
1487 self.approval_enabled = enabled;
1488 self
1489 }
1490
1491 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1493 self.approval_threshold = threshold;
1494 self
1495 }
1496
1497 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1503 self.drift_controller = Some(controller);
1504 self
1505 }
1506
1507 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1512 if config.enabled {
1513 let total_periods = self.calculate_total_periods();
1514 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1515 }
1516 self
1517 }
1518
1519 fn calculate_total_periods(&self) -> u32 {
1521 let start_year = self.start_date.year();
1522 let start_month = self.start_date.month();
1523 let end_year = self.end_date.year();
1524 let end_month = self.end_date.month();
1525
1526 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1527 }
1528
1529 fn date_to_period(&self, date: NaiveDate) -> u32 {
1531 let start_year = self.start_date.year();
1532 let start_month = self.start_date.month() as i32;
1533 let date_year = date.year();
1534 let date_month = date.month() as i32;
1535
1536 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1537 }
1538
1539 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1541 if let Some(ref controller) = self.drift_controller {
1542 let period = self.date_to_period(date);
1543 controller.compute_adjustments(period)
1544 } else {
1545 DriftAdjustments::none()
1546 }
1547 }
1548
1549 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1551 if let Some(ref pool) = self.user_pool {
1552 let persona = if is_automated {
1553 UserPersona::AutomatedSystem
1554 } else {
1555 let roll: f64 = self.rng.gen();
1557 if roll < 0.4 {
1558 UserPersona::JuniorAccountant
1559 } else if roll < 0.7 {
1560 UserPersona::SeniorAccountant
1561 } else if roll < 0.85 {
1562 UserPersona::Controller
1563 } else {
1564 UserPersona::Manager
1565 }
1566 };
1567
1568 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1569 return (
1570 user.user_id.clone(),
1571 format!("{:?}", user.persona).to_lowercase(),
1572 );
1573 }
1574 }
1575
1576 if is_automated {
1578 (
1579 format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1580 "automated_system".to_string(),
1581 )
1582 } else {
1583 (
1584 format!("USER{:04}", self.rng.gen_range(1..=40)),
1585 "senior_accountant".to_string(),
1586 )
1587 }
1588 }
1589
1590 fn select_source(&mut self) -> TransactionSource {
1592 let roll: f64 = self.rng.gen();
1593 let dist = &self.config.source_distribution;
1594
1595 if roll < dist.manual {
1596 TransactionSource::Manual
1597 } else if roll < dist.manual + dist.automated {
1598 TransactionSource::Automated
1599 } else if roll < dist.manual + dist.automated + dist.recurring {
1600 TransactionSource::Recurring
1601 } else {
1602 TransactionSource::Adjustment
1603 }
1604 }
1605
1606 fn select_business_process(&mut self) -> BusinessProcess {
1608 let roll: f64 = self.rng.gen();
1609
1610 if roll < 0.35 {
1612 BusinessProcess::O2C
1613 } else if roll < 0.65 {
1614 BusinessProcess::P2P
1615 } else if roll < 0.85 {
1616 BusinessProcess::R2R
1617 } else if roll < 0.95 {
1618 BusinessProcess::H2R
1619 } else {
1620 BusinessProcess::A2R
1621 }
1622 }
1623
1624 fn select_debit_account(&mut self) -> &GLAccount {
1625 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1626 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1627
1628 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1630 accounts
1631 } else {
1632 expense_accounts
1633 };
1634
1635 all.choose(&mut self.rng)
1636 .copied()
1637 .unwrap_or_else(|| &self.coa.accounts[0])
1638 }
1639
1640 fn select_credit_account(&mut self) -> &GLAccount {
1641 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1642 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1643
1644 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1646 liability_accounts
1647 } else {
1648 revenue_accounts
1649 };
1650
1651 all.choose(&mut self.rng)
1652 .copied()
1653 .unwrap_or_else(|| &self.coa.accounts[0])
1654 }
1655}
1656
1657impl Generator for JournalEntryGenerator {
1658 type Item = JournalEntry;
1659 type Config = (
1660 TransactionConfig,
1661 Arc<ChartOfAccounts>,
1662 Vec<String>,
1663 NaiveDate,
1664 NaiveDate,
1665 );
1666
1667 fn new(config: Self::Config, seed: u64) -> Self {
1668 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1669 }
1670
1671 fn generate_one(&mut self) -> Self::Item {
1672 self.generate()
1673 }
1674
1675 fn reset(&mut self) {
1676 self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1677 self.line_sampler.reset(self.seed + 1);
1678 self.amount_sampler.reset(self.seed + 2);
1679 self.temporal_sampler.reset(self.seed + 3);
1680 self.count = 0;
1681 self.uuid_factory.reset();
1682
1683 let mut ref_gen = ReferenceGenerator::new(
1685 self.start_date.year(),
1686 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1687 );
1688 ref_gen.set_prefix(
1689 ReferenceType::Invoice,
1690 &self.template_config.references.invoice_prefix,
1691 );
1692 ref_gen.set_prefix(
1693 ReferenceType::PurchaseOrder,
1694 &self.template_config.references.po_prefix,
1695 );
1696 ref_gen.set_prefix(
1697 ReferenceType::SalesOrder,
1698 &self.template_config.references.so_prefix,
1699 );
1700 self.reference_generator = ref_gen;
1701 }
1702
1703 fn count(&self) -> u64 {
1704 self.count
1705 }
1706
1707 fn seed(&self) -> u64 {
1708 self.seed
1709 }
1710}
1711
1712#[cfg(test)]
1713mod tests {
1714 use super::*;
1715 use crate::ChartOfAccountsGenerator;
1716
1717 #[test]
1718 fn test_generate_balanced_entries() {
1719 let mut coa_gen =
1720 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1721 let coa = Arc::new(coa_gen.generate());
1722
1723 let mut je_gen = JournalEntryGenerator::new_with_params(
1724 TransactionConfig::default(),
1725 coa,
1726 vec!["1000".to_string()],
1727 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1728 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1729 42,
1730 );
1731
1732 let mut balanced_count = 0;
1733 for _ in 0..100 {
1734 let entry = je_gen.generate();
1735
1736 let has_human_error = entry
1738 .header
1739 .header_text
1740 .as_ref()
1741 .map(|t| t.contains("[HUMAN_ERROR:"))
1742 .unwrap_or(false);
1743
1744 if !has_human_error {
1745 assert!(
1746 entry.is_balanced(),
1747 "Entry {:?} is not balanced",
1748 entry.header.document_id
1749 );
1750 balanced_count += 1;
1751 }
1752 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1753 }
1754
1755 assert!(
1757 balanced_count >= 80,
1758 "Expected at least 80 balanced entries, got {}",
1759 balanced_count
1760 );
1761 }
1762
1763 #[test]
1764 fn test_deterministic_generation() {
1765 let mut coa_gen =
1766 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1767 let coa = Arc::new(coa_gen.generate());
1768
1769 let mut gen1 = JournalEntryGenerator::new_with_params(
1770 TransactionConfig::default(),
1771 Arc::clone(&coa),
1772 vec!["1000".to_string()],
1773 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1774 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1775 42,
1776 );
1777
1778 let mut gen2 = JournalEntryGenerator::new_with_params(
1779 TransactionConfig::default(),
1780 coa,
1781 vec!["1000".to_string()],
1782 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1783 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1784 42,
1785 );
1786
1787 for _ in 0..50 {
1788 let e1 = gen1.generate();
1789 let e2 = gen2.generate();
1790 assert_eq!(e1.header.document_id, e2.header.document_id);
1791 assert_eq!(e1.total_debit(), e2.total_debit());
1792 }
1793 }
1794
1795 #[test]
1796 fn test_templates_generate_descriptions() {
1797 let mut coa_gen =
1798 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1799 let coa = Arc::new(coa_gen.generate());
1800
1801 let template_config = TemplateConfig {
1803 names: datasynth_config::schema::NameTemplateConfig {
1804 generate_realistic_names: true,
1805 email_domain: "test.com".to_string(),
1806 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1807 },
1808 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1809 generate_header_text: true,
1810 generate_line_text: true,
1811 },
1812 references: datasynth_config::schema::ReferenceTemplateConfig {
1813 generate_references: true,
1814 invoice_prefix: "TEST-INV".to_string(),
1815 po_prefix: "TEST-PO".to_string(),
1816 so_prefix: "TEST-SO".to_string(),
1817 },
1818 };
1819
1820 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1821 TransactionConfig::default(),
1822 coa,
1823 vec!["1000".to_string()],
1824 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1825 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1826 42,
1827 template_config,
1828 None,
1829 )
1830 .with_persona_errors(false); for _ in 0..10 {
1833 let entry = je_gen.generate();
1834
1835 assert!(
1837 entry.header.header_text.is_some(),
1838 "Header text should be populated"
1839 );
1840
1841 assert!(
1843 entry.header.reference.is_some(),
1844 "Reference should be populated"
1845 );
1846
1847 assert!(
1849 entry.header.business_process.is_some(),
1850 "Business process should be set"
1851 );
1852
1853 for line in &entry.lines {
1855 assert!(line.line_text.is_some(), "Line text should be populated");
1856 }
1857
1858 assert!(entry.is_balanced());
1860 }
1861 }
1862
1863 #[test]
1864 fn test_user_pool_integration() {
1865 let mut coa_gen =
1866 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1867 let coa = Arc::new(coa_gen.generate());
1868
1869 let companies = vec!["1000".to_string()];
1870
1871 let mut user_gen = crate::UserGenerator::new(42);
1873 let user_pool = user_gen.generate_standard(&companies);
1874
1875 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1876 TransactionConfig::default(),
1877 coa,
1878 companies,
1879 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1880 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1881 42,
1882 TemplateConfig::default(),
1883 Some(user_pool),
1884 );
1885
1886 for _ in 0..20 {
1888 let entry = je_gen.generate();
1889
1890 assert!(!entry.header.created_by.is_empty());
1893 }
1894 }
1895
1896 #[test]
1897 fn test_master_data_connection() {
1898 let mut coa_gen =
1899 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1900 let coa = Arc::new(coa_gen.generate());
1901
1902 let vendors = vec![
1904 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1905 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1906 ];
1907
1908 let customers = vec![
1910 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1911 Customer::new(
1912 "C-TEST-002",
1913 "Test Customer Two",
1914 CustomerType::SmallBusiness,
1915 ),
1916 ];
1917
1918 let materials = vec![Material::new(
1920 "MAT-TEST-001",
1921 "Test Material A",
1922 MaterialType::RawMaterial,
1923 )];
1924
1925 let generator = JournalEntryGenerator::new_with_params(
1927 TransactionConfig::default(),
1928 coa,
1929 vec!["1000".to_string()],
1930 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1931 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1932 42,
1933 );
1934
1935 assert!(!generator.is_using_real_master_data());
1937
1938 let generator_with_data = generator
1940 .with_vendors(&vendors)
1941 .with_customers(&customers)
1942 .with_materials(&materials);
1943
1944 assert!(generator_with_data.is_using_real_master_data());
1946 }
1947
1948 #[test]
1949 fn test_with_master_data_convenience_method() {
1950 let mut coa_gen =
1951 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1952 let coa = Arc::new(coa_gen.generate());
1953
1954 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1955 let customers = vec![Customer::new(
1956 "C-001",
1957 "Customer One",
1958 CustomerType::Corporate,
1959 )];
1960 let materials = vec![Material::new(
1961 "MAT-001",
1962 "Material One",
1963 MaterialType::RawMaterial,
1964 )];
1965
1966 let generator = JournalEntryGenerator::new_with_params(
1967 TransactionConfig::default(),
1968 coa,
1969 vec!["1000".to_string()],
1970 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1971 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1972 42,
1973 )
1974 .with_master_data(&vendors, &customers, &materials);
1975
1976 assert!(generator.is_using_real_master_data());
1977 }
1978
1979 #[test]
1980 fn test_stress_factors_increase_error_rate() {
1981 let mut coa_gen =
1982 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1983 let coa = Arc::new(coa_gen.generate());
1984
1985 let generator = JournalEntryGenerator::new_with_params(
1986 TransactionConfig::default(),
1987 coa,
1988 vec!["1000".to_string()],
1989 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1990 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1991 42,
1992 );
1993
1994 let base_rate = 0.1;
1995
1996 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
1999 assert!(
2000 (regular_rate - base_rate).abs() < 0.01,
2001 "Regular day should have minimal stress factor adjustment"
2002 );
2003
2004 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2007 assert!(
2008 month_end_rate > regular_rate,
2009 "Month end should have higher error rate than regular day"
2010 );
2011
2012 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2015 assert!(
2016 year_end_rate > month_end_rate,
2017 "Year end should have highest error rate"
2018 );
2019
2020 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2023 assert!(
2024 friday_rate > regular_rate,
2025 "Friday should have higher error rate than mid-week"
2026 );
2027
2028 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2031 assert!(
2032 monday_rate > regular_rate,
2033 "Monday should have higher error rate than mid-week"
2034 );
2035 }
2036
2037 #[test]
2038 fn test_batching_produces_similar_entries() {
2039 let mut coa_gen =
2040 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2041 let coa = Arc::new(coa_gen.generate());
2042
2043 let mut je_gen = JournalEntryGenerator::new_with_params(
2045 TransactionConfig::default(),
2046 coa,
2047 vec!["1000".to_string()],
2048 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2049 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2050 123,
2051 )
2052 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2056
2057 for entry in &entries {
2059 assert!(
2060 entry.is_balanced(),
2061 "All entries including batched should be balanced"
2062 );
2063 }
2064
2065 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2067 std::collections::HashMap::new();
2068 for entry in &entries {
2069 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2070 }
2071
2072 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2074 assert!(
2075 dates_with_multiple > 0,
2076 "With batching, should see some dates with multiple entries"
2077 );
2078 }
2079
2080 #[test]
2081 fn test_temporal_patterns_business_days() {
2082 use datasynth_config::schema::{
2083 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2084 };
2085
2086 let mut coa_gen =
2087 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2088 let coa = Arc::new(coa_gen.generate());
2089
2090 let temporal_config = TemporalPatternsConfig {
2092 enabled: true,
2093 business_days: BusinessDaySchemaConfig {
2094 enabled: true,
2095 ..Default::default()
2096 },
2097 calendars: CalendarSchemaConfig {
2098 regions: vec!["US".to_string()],
2099 custom_holidays: vec![],
2100 },
2101 ..Default::default()
2102 };
2103
2104 let mut je_gen = JournalEntryGenerator::new_with_params(
2105 TransactionConfig::default(),
2106 coa,
2107 vec!["1000".to_string()],
2108 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2109 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2111 )
2112 .with_temporal_patterns(temporal_config, 42)
2113 .with_persona_errors(false);
2114
2115 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2117
2118 for entry in &entries {
2119 let weekday = entry.header.posting_date.weekday();
2120 assert!(
2121 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2122 "Posting date {:?} should not be a weekend",
2123 entry.header.posting_date
2124 );
2125 }
2126 }
2127}