1use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{
11 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
12};
13use datasynth_core::distributions::{
14 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
15 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
16 ProcessingLagCalculator, ProcessingLagConfig, *,
17};
18use datasynth_core::models::*;
19use datasynth_core::templates::{
20 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
21};
22use datasynth_core::traits::Generator;
23use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
24
25use crate::company_selector::WeightedCompanySelector;
26use crate::user_generator::{UserGenerator, UserGeneratorConfig};
27
28pub struct JournalEntryGenerator {
30 rng: ChaCha8Rng,
31 seed: u64,
32 config: TransactionConfig,
33 coa: Arc<ChartOfAccounts>,
34 companies: Vec<String>,
35 company_selector: WeightedCompanySelector,
36 line_sampler: LineItemSampler,
37 amount_sampler: AmountSampler,
38 temporal_sampler: TemporalSampler,
39 start_date: NaiveDate,
40 end_date: NaiveDate,
41 count: u64,
42 uuid_factory: DeterministicUuidFactory,
43 user_pool: Option<UserPool>,
45 description_generator: DescriptionGenerator,
46 reference_generator: ReferenceGenerator,
47 template_config: TemplateConfig,
48 vendor_pool: VendorPool,
49 customer_pool: CustomerPool,
50 material_pool: Option<MaterialPool>,
52 using_real_master_data: bool,
54 fraud_config: FraudConfig,
56 persona_errors_enabled: bool,
58 approval_enabled: bool,
60 approval_threshold: rust_decimal::Decimal,
61 batch_state: Option<BatchState>,
63 drift_controller: Option<DriftController>,
65 business_day_calculator: Option<BusinessDayCalculator>,
67 processing_lag_calculator: Option<ProcessingLagCalculator>,
68 temporal_patterns_config: Option<TemporalPatternsConfig>,
69}
70
71#[derive(Clone)]
76struct BatchState {
77 base_vendor: Option<String>,
79 base_customer: Option<String>,
80 base_account_number: String,
81 base_amount: rust_decimal::Decimal,
82 base_business_process: Option<BusinessProcess>,
83 base_posting_date: NaiveDate,
84 remaining: u8,
86}
87
88impl JournalEntryGenerator {
89 pub fn new_with_params(
91 config: TransactionConfig,
92 coa: Arc<ChartOfAccounts>,
93 companies: Vec<String>,
94 start_date: NaiveDate,
95 end_date: NaiveDate,
96 seed: u64,
97 ) -> Self {
98 Self::new_with_full_config(
99 config,
100 coa,
101 companies,
102 start_date,
103 end_date,
104 seed,
105 TemplateConfig::default(),
106 None,
107 )
108 }
109
110 #[allow(clippy::too_many_arguments)]
112 pub fn new_with_full_config(
113 config: TransactionConfig,
114 coa: Arc<ChartOfAccounts>,
115 companies: Vec<String>,
116 start_date: NaiveDate,
117 end_date: NaiveDate,
118 seed: u64,
119 template_config: TemplateConfig,
120 user_pool: Option<UserPool>,
121 ) -> Self {
122 let user_pool = user_pool.or_else(|| {
124 if template_config.names.generate_realistic_names {
125 let user_gen_config = UserGeneratorConfig {
126 culture_distribution: vec![
127 (
128 datasynth_core::templates::NameCulture::WesternUs,
129 template_config.names.culture_distribution.western_us,
130 ),
131 (
132 datasynth_core::templates::NameCulture::Hispanic,
133 template_config.names.culture_distribution.hispanic,
134 ),
135 (
136 datasynth_core::templates::NameCulture::German,
137 template_config.names.culture_distribution.german,
138 ),
139 (
140 datasynth_core::templates::NameCulture::French,
141 template_config.names.culture_distribution.french,
142 ),
143 (
144 datasynth_core::templates::NameCulture::Chinese,
145 template_config.names.culture_distribution.chinese,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Japanese,
149 template_config.names.culture_distribution.japanese,
150 ),
151 (
152 datasynth_core::templates::NameCulture::Indian,
153 template_config.names.culture_distribution.indian,
154 ),
155 ],
156 email_domain: template_config.names.email_domain.clone(),
157 generate_realistic_names: true,
158 };
159 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
160 Some(user_gen.generate_standard(&companies))
161 } else {
162 None
163 }
164 });
165
166 let mut ref_gen = ReferenceGenerator::new(
168 start_date.year(),
169 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
170 );
171 ref_gen.set_prefix(
172 ReferenceType::Invoice,
173 &template_config.references.invoice_prefix,
174 );
175 ref_gen.set_prefix(
176 ReferenceType::PurchaseOrder,
177 &template_config.references.po_prefix,
178 );
179 ref_gen.set_prefix(
180 ReferenceType::SalesOrder,
181 &template_config.references.so_prefix,
182 );
183
184 let company_selector = WeightedCompanySelector::uniform(companies.clone());
186
187 Self {
188 rng: ChaCha8Rng::seed_from_u64(seed),
189 seed,
190 config: config.clone(),
191 coa,
192 companies,
193 company_selector,
194 line_sampler: LineItemSampler::with_config(
195 seed + 1,
196 config.line_item_distribution.clone(),
197 config.even_odd_distribution.clone(),
198 config.debit_credit_distribution.clone(),
199 ),
200 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
201 temporal_sampler: TemporalSampler::with_config(
202 seed + 3,
203 config.seasonality.clone(),
204 WorkingHoursConfig::default(),
205 Vec::new(),
206 ),
207 start_date,
208 end_date,
209 count: 0,
210 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
211 user_pool,
212 description_generator: DescriptionGenerator::new(),
213 reference_generator: ref_gen,
214 template_config,
215 vendor_pool: VendorPool::standard(),
216 customer_pool: CustomerPool::standard(),
217 material_pool: None,
218 using_real_master_data: false,
219 fraud_config: FraudConfig::default(),
220 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
224 drift_controller: None,
225 business_day_calculator: None,
226 processing_lag_calculator: None,
227 temporal_patterns_config: None,
228 }
229 }
230
231 pub fn from_generator_config(
236 full_config: &GeneratorConfig,
237 coa: Arc<ChartOfAccounts>,
238 start_date: NaiveDate,
239 end_date: NaiveDate,
240 seed: u64,
241 ) -> Self {
242 let companies: Vec<String> = full_config
243 .companies
244 .iter()
245 .map(|c| c.code.clone())
246 .collect();
247
248 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
250
251 let mut generator = Self::new_with_full_config(
252 full_config.transactions.clone(),
253 coa,
254 companies,
255 start_date,
256 end_date,
257 seed,
258 full_config.templates.clone(),
259 None,
260 );
261
262 generator.company_selector = company_selector;
264
265 generator.fraud_config = full_config.fraud.clone();
267
268 let temporal_config = &full_config.temporal_patterns;
270 if temporal_config.enabled {
271 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
272 }
273
274 generator
275 }
276
277 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
284 if config.business_days.enabled {
286 let region = config
287 .calendars
288 .regions
289 .first()
290 .map(|r| Self::parse_region(r))
291 .unwrap_or(Region::US);
292
293 let calendar = HolidayCalendar::new(region, self.start_date.year());
294 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
295 }
296
297 if config.processing_lags.enabled {
299 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
300 self.processing_lag_calculator =
301 Some(ProcessingLagCalculator::with_config(seed, lag_config));
302 }
303
304 let model = config.period_end.model.as_deref().unwrap_or("flat");
306 if model != "flat"
307 || config
308 .period_end
309 .month_end
310 .as_ref()
311 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
312 {
313 let dynamics = Self::convert_period_end_config(&config.period_end);
314 self.temporal_sampler.set_period_end_dynamics(dynamics);
315 }
316
317 self.temporal_patterns_config = Some(config);
318 self
319 }
320
321 fn convert_processing_lag_config(
323 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
324 ) -> ProcessingLagConfig {
325 let mut config = ProcessingLagConfig {
326 enabled: schema.enabled,
327 ..Default::default()
328 };
329
330 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
332 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
333 if let Some(min) = lag.min_hours {
334 dist.min_lag_hours = min;
335 }
336 if let Some(max) = lag.max_hours {
337 dist.max_lag_hours = max;
338 }
339 dist
340 };
341
342 if let Some(ref lag) = schema.sales_order_lag {
344 config
345 .event_lags
346 .insert(EventType::SalesOrder, convert_lag(lag));
347 }
348 if let Some(ref lag) = schema.purchase_order_lag {
349 config
350 .event_lags
351 .insert(EventType::PurchaseOrder, convert_lag(lag));
352 }
353 if let Some(ref lag) = schema.goods_receipt_lag {
354 config
355 .event_lags
356 .insert(EventType::GoodsReceipt, convert_lag(lag));
357 }
358 if let Some(ref lag) = schema.invoice_receipt_lag {
359 config
360 .event_lags
361 .insert(EventType::InvoiceReceipt, convert_lag(lag));
362 }
363 if let Some(ref lag) = schema.invoice_issue_lag {
364 config
365 .event_lags
366 .insert(EventType::InvoiceIssue, convert_lag(lag));
367 }
368 if let Some(ref lag) = schema.payment_lag {
369 config
370 .event_lags
371 .insert(EventType::Payment, convert_lag(lag));
372 }
373 if let Some(ref lag) = schema.journal_entry_lag {
374 config
375 .event_lags
376 .insert(EventType::JournalEntry, convert_lag(lag));
377 }
378
379 if let Some(ref cross_day) = schema.cross_day_posting {
381 config.cross_day = CrossDayConfig {
382 enabled: cross_day.enabled,
383 probability_by_hour: cross_day.probability_by_hour.clone(),
384 ..Default::default()
385 };
386 }
387
388 config
389 }
390
391 fn convert_period_end_config(
393 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
394 ) -> PeriodEndDynamics {
395 let model_type = schema.model.as_deref().unwrap_or("exponential");
396
397 let convert_period =
399 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
400 default_peak: f64|
401 -> PeriodEndConfig {
402 if let Some(p) = period {
403 let model = match model_type {
404 "flat" => PeriodEndModel::FlatMultiplier {
405 multiplier: p.peak_multiplier.unwrap_or(default_peak),
406 },
407 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
408 start_day: p.start_day.unwrap_or(-10),
409 sustained_high_days: p.sustained_high_days.unwrap_or(3),
410 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
411 ramp_up_days: 3, },
413 _ => PeriodEndModel::ExponentialAcceleration {
414 start_day: p.start_day.unwrap_or(-10),
415 base_multiplier: p.base_multiplier.unwrap_or(1.0),
416 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
417 decay_rate: p.decay_rate.unwrap_or(0.3),
418 },
419 };
420 PeriodEndConfig {
421 enabled: true,
422 model,
423 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
424 }
425 } else {
426 PeriodEndConfig {
427 enabled: true,
428 model: PeriodEndModel::ExponentialAcceleration {
429 start_day: -10,
430 base_multiplier: 1.0,
431 peak_multiplier: default_peak,
432 decay_rate: 0.3,
433 },
434 additional_multiplier: 1.0,
435 }
436 }
437 };
438
439 PeriodEndDynamics::new(
440 convert_period(schema.month_end.as_ref(), 2.0),
441 convert_period(schema.quarter_end.as_ref(), 3.5),
442 convert_period(schema.year_end.as_ref(), 5.0),
443 )
444 }
445
446 fn parse_region(region_str: &str) -> Region {
448 match region_str.to_uppercase().as_str() {
449 "US" => Region::US,
450 "DE" => Region::DE,
451 "GB" => Region::GB,
452 "CN" => Region::CN,
453 "JP" => Region::JP,
454 "IN" => Region::IN,
455 "BR" => Region::BR,
456 "MX" => Region::MX,
457 "AU" => Region::AU,
458 "SG" => Region::SG,
459 "KR" => Region::KR,
460 _ => Region::US,
461 }
462 }
463
464 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
466 self.company_selector = selector;
467 }
468
469 pub fn company_selector(&self) -> &WeightedCompanySelector {
471 &self.company_selector
472 }
473
474 pub fn set_fraud_config(&mut self, config: FraudConfig) {
476 self.fraud_config = config;
477 }
478
479 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
484 if !vendors.is_empty() {
485 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
486 self.using_real_master_data = true;
487 }
488 self
489 }
490
491 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
496 if !customers.is_empty() {
497 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
498 self.using_real_master_data = true;
499 }
500 self
501 }
502
503 pub fn with_materials(mut self, materials: &[Material]) -> Self {
507 if !materials.is_empty() {
508 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
509 self.using_real_master_data = true;
510 }
511 self
512 }
513
514 pub fn with_master_data(
519 self,
520 vendors: &[Vendor],
521 customers: &[Customer],
522 materials: &[Material],
523 ) -> Self {
524 self.with_vendors(vendors)
525 .with_customers(customers)
526 .with_materials(materials)
527 }
528
529 pub fn is_using_real_master_data(&self) -> bool {
531 self.using_real_master_data
532 }
533
534 fn determine_fraud(&mut self) -> Option<FraudType> {
536 if !self.fraud_config.enabled {
537 return None;
538 }
539
540 if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
542 return None;
543 }
544
545 Some(self.select_fraud_type())
547 }
548
549 fn select_fraud_type(&mut self) -> FraudType {
551 let dist = &self.fraud_config.fraud_type_distribution;
552 let roll: f64 = self.rng.gen();
553
554 let mut cumulative = 0.0;
555
556 cumulative += dist.suspense_account_abuse;
557 if roll < cumulative {
558 return FraudType::SuspenseAccountAbuse;
559 }
560
561 cumulative += dist.fictitious_transaction;
562 if roll < cumulative {
563 return FraudType::FictitiousTransaction;
564 }
565
566 cumulative += dist.revenue_manipulation;
567 if roll < cumulative {
568 return FraudType::RevenueManipulation;
569 }
570
571 cumulative += dist.expense_capitalization;
572 if roll < cumulative {
573 return FraudType::ExpenseCapitalization;
574 }
575
576 cumulative += dist.split_transaction;
577 if roll < cumulative {
578 return FraudType::SplitTransaction;
579 }
580
581 cumulative += dist.timing_anomaly;
582 if roll < cumulative {
583 return FraudType::TimingAnomaly;
584 }
585
586 cumulative += dist.unauthorized_access;
587 if roll < cumulative {
588 return FraudType::UnauthorizedAccess;
589 }
590
591 FraudType::DuplicatePayment
593 }
594
595 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
597 match fraud_type {
598 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
599 FraudAmountPattern::ThresholdAdjacent
600 }
601 FraudType::FictitiousTransaction
602 | FraudType::FictitiousEntry
603 | FraudType::SuspenseAccountAbuse
604 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
605 FraudType::RevenueManipulation
606 | FraudType::ExpenseCapitalization
607 | FraudType::ImproperCapitalization
608 | FraudType::ReserveManipulation
609 | FraudType::UnauthorizedAccess
610 | FraudType::PrematureRevenue
611 | FraudType::UnderstatedLiabilities
612 | FraudType::OverstatedAssets
613 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
614 FraudType::DuplicatePayment
615 | FraudType::TimingAnomaly
616 | FraudType::SelfApproval
617 | FraudType::ExceededApprovalLimit
618 | FraudType::SegregationOfDutiesViolation
619 | FraudType::UnauthorizedApproval
620 | FraudType::CollusiveApproval
621 | FraudType::FictitiousVendor
622 | FraudType::ShellCompanyPayment
623 | FraudType::Kickback
624 | FraudType::KickbackScheme
625 | FraudType::InvoiceManipulation
626 | FraudType::AssetMisappropriation
627 | FraudType::InventoryTheft
628 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
629 FraudType::ImproperRevenueRecognition
631 | FraudType::ImproperPoAllocation
632 | FraudType::VariableConsiderationManipulation
633 | FraudType::ContractModificationMisstatement => {
634 FraudAmountPattern::StatisticallyImprobable
635 }
636 FraudType::LeaseClassificationManipulation
638 | FraudType::OffBalanceSheetLease
639 | FraudType::LeaseLiabilityUnderstatement
640 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
641 FraudType::FairValueHierarchyManipulation
643 | FraudType::Level3InputManipulation
644 | FraudType::ValuationTechniqueManipulation => {
645 FraudAmountPattern::StatisticallyImprobable
646 }
647 FraudType::DelayedImpairment
649 | FraudType::ImpairmentTestAvoidance
650 | FraudType::CashFlowProjectionManipulation
651 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
652 FraudType::BidRigging
654 | FraudType::PhantomVendorContract
655 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
656 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
657 FraudType::GhostEmployeePayroll
659 | FraudType::PayrollInflation
660 | FraudType::DuplicateExpenseReport
661 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
662 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
663 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
665 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
666 }
667 }
668
669 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
671 self.uuid_factory.next()
672 }
673
674 pub fn generate(&mut self) -> JournalEntry {
676 if let Some(ref state) = self.batch_state {
678 if state.remaining > 0 {
679 return self.generate_batched_entry();
680 }
681 }
682
683 self.count += 1;
684
685 let document_id = self.generate_deterministic_uuid();
687
688 let mut posting_date = self
690 .temporal_sampler
691 .sample_date(self.start_date, self.end_date);
692
693 if let Some(ref calc) = self.business_day_calculator {
695 if !calc.is_business_day(posting_date) {
696 posting_date = calc.next_business_day(posting_date, false);
698 if posting_date > self.end_date {
700 posting_date = calc.prev_business_day(self.end_date, true);
701 }
702 }
703 }
704
705 let company_code = self.company_selector.select(&mut self.rng).to_string();
707
708 let line_spec = self.line_sampler.sample();
710
711 let source = self.select_source();
713 let is_automated = matches!(
714 source,
715 TransactionSource::Automated | TransactionSource::Recurring
716 );
717
718 let business_process = self.select_business_process();
720
721 let fraud_type = self.determine_fraud();
723 let is_fraud = fraud_type.is_some();
724
725 let time = self.temporal_sampler.sample_time(!is_automated);
727 let created_at = posting_date.and_time(time).and_utc();
728
729 let (created_by, user_persona) = self.select_user(is_automated);
731
732 let mut header =
734 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
735 header.created_at = created_at;
736 header.source = source;
737 header.created_by = created_by;
738 header.user_persona = user_persona;
739 header.business_process = Some(business_process);
740 header.is_fraud = is_fraud;
741 header.fraud_type = fraud_type;
742
743 let mut context =
745 DescriptionContext::with_period(posting_date.month(), posting_date.year());
746
747 match business_process {
749 BusinessProcess::P2P => {
750 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
751 context.vendor_name = Some(vendor.name.clone());
752 }
753 }
754 BusinessProcess::O2C => {
755 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
756 context.customer_name = Some(customer.name.clone());
757 }
758 }
759 _ => {}
760 }
761
762 if self.template_config.descriptions.generate_header_text {
764 header.header_text = Some(self.description_generator.generate_header_text(
765 business_process,
766 &context,
767 &mut self.rng,
768 ));
769 }
770
771 if self.template_config.references.generate_references {
773 header.reference = Some(
774 self.reference_generator
775 .generate_for_process_year(business_process, posting_date.year()),
776 );
777 }
778
779 let mut entry = JournalEntry::new(header);
781
782 let base_amount = if let Some(ft) = fraud_type {
784 let pattern = self.fraud_type_to_amount_pattern(ft);
785 self.amount_sampler.sample_fraud(pattern)
786 } else {
787 self.amount_sampler.sample()
788 };
789
790 let drift_adjusted_amount = {
792 let drift = self.get_drift_adjustments(posting_date);
793 if drift.amount_mean_multiplier != 1.0 {
794 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
796 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
797 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
798 } else {
799 base_amount
800 }
801 };
802
803 let total_amount = if is_automated {
805 drift_adjusted_amount } else {
807 self.apply_human_variation(drift_adjusted_amount)
808 };
809
810 let debit_amounts = self
812 .amount_sampler
813 .sample_summing_to(line_spec.debit_count, total_amount);
814 for (i, amount) in debit_amounts.into_iter().enumerate() {
815 let account_number = self.select_debit_account().account_number.clone();
816 let mut line = JournalEntryLine::debit(
817 entry.header.document_id,
818 (i + 1) as u32,
819 account_number.clone(),
820 amount,
821 );
822
823 if self.template_config.descriptions.generate_line_text {
825 line.line_text = Some(self.description_generator.generate_line_text(
826 &account_number,
827 &context,
828 &mut self.rng,
829 ));
830 }
831
832 entry.add_line(line);
833 }
834
835 let credit_amounts = self
837 .amount_sampler
838 .sample_summing_to(line_spec.credit_count, total_amount);
839 for (i, amount) in credit_amounts.into_iter().enumerate() {
840 let account_number = self.select_credit_account().account_number.clone();
841 let mut line = JournalEntryLine::credit(
842 entry.header.document_id,
843 (line_spec.debit_count + i + 1) as u32,
844 account_number.clone(),
845 amount,
846 );
847
848 if self.template_config.descriptions.generate_line_text {
850 line.line_text = Some(self.description_generator.generate_line_text(
851 &account_number,
852 &context,
853 &mut self.rng,
854 ));
855 }
856
857 entry.add_line(line);
858 }
859
860 if self.persona_errors_enabled && !is_automated {
862 self.maybe_inject_persona_error(&mut entry);
863 }
864
865 if self.approval_enabled {
867 self.maybe_apply_approval_workflow(&mut entry, posting_date);
868 }
869
870 self.maybe_start_batch(&entry);
872
873 entry
874 }
875
876 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
881 self.persona_errors_enabled = enabled;
882 self
883 }
884
885 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
890 self.fraud_config = config;
891 self
892 }
893
894 pub fn persona_errors_enabled(&self) -> bool {
896 self.persona_errors_enabled
897 }
898
899 pub fn with_batching(mut self, enabled: bool) -> Self {
904 if !enabled {
905 self.batch_state = None;
906 }
907 self
908 }
909
910 pub fn batching_enabled(&self) -> bool {
912 true
914 }
915
916 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
921 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
923 return;
924 }
925
926 if self.rng.gen::<f64>() > 0.15 {
928 return;
929 }
930
931 let base_account = entry
933 .lines
934 .first()
935 .map(|l| l.gl_account.clone())
936 .unwrap_or_default();
937
938 let base_amount = entry.total_debit();
939
940 self.batch_state = Some(BatchState {
941 base_vendor: None, base_customer: None,
943 base_account_number: base_account,
944 base_amount,
945 base_business_process: entry.header.business_process,
946 base_posting_date: entry.header.posting_date,
947 remaining: self.rng.gen_range(2..7), });
949 }
950
951 fn generate_batched_entry(&mut self) -> JournalEntry {
959 use rust_decimal::Decimal;
960
961 if let Some(ref mut state) = self.batch_state {
963 state.remaining = state.remaining.saturating_sub(1);
964 }
965
966 let batch = self
967 .batch_state
968 .clone()
969 .expect("batch_state set before calling generate_batched_entry");
970
971 let posting_date = batch.base_posting_date;
973
974 self.count += 1;
975 let document_id = self.generate_deterministic_uuid();
976
977 let company_code = self.company_selector.select(&mut self.rng).to_string();
979
980 let _line_spec = LineItemSpec {
982 total_count: 2,
983 debit_count: 1,
984 credit_count: 1,
985 split_type: DebitCreditSplit::Equal,
986 };
987
988 let source = TransactionSource::Manual;
990
991 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
993
994 let time = self.temporal_sampler.sample_time(true);
996 let created_at = posting_date.and_time(time).and_utc();
997
998 let (created_by, user_persona) = self.select_user(false);
1000
1001 let mut header =
1003 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1004 header.created_at = created_at;
1005 header.source = source;
1006 header.created_by = created_by;
1007 header.user_persona = user_persona;
1008 header.business_process = Some(business_process);
1009
1010 let variation = self.rng.gen_range(-0.15..0.15);
1012 let varied_amount =
1013 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1014 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1015
1016 let mut entry = JournalEntry::new(header);
1018
1019 let debit_line = JournalEntryLine::debit(
1021 entry.header.document_id,
1022 1,
1023 batch.base_account_number.clone(),
1024 total_amount,
1025 );
1026 entry.add_line(debit_line);
1027
1028 let credit_account = self.select_credit_account().account_number.clone();
1030 let credit_line =
1031 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1032 entry.add_line(credit_line);
1033
1034 if self.persona_errors_enabled {
1036 self.maybe_inject_persona_error(&mut entry);
1037 }
1038
1039 if self.approval_enabled {
1041 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1042 }
1043
1044 if batch.remaining <= 1 {
1046 self.batch_state = None;
1047 }
1048
1049 entry
1050 }
1051
1052 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1054 let persona_str = &entry.header.user_persona;
1056 let persona = match persona_str.to_lowercase().as_str() {
1057 s if s.contains("junior") => UserPersona::JuniorAccountant,
1058 s if s.contains("senior") => UserPersona::SeniorAccountant,
1059 s if s.contains("controller") => UserPersona::Controller,
1060 s if s.contains("manager") => UserPersona::Manager,
1061 s if s.contains("executive") => UserPersona::Executive,
1062 _ => return, };
1064
1065 let base_error_rate = persona.error_rate();
1067
1068 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1070
1071 if self.rng.gen::<f64>() >= adjusted_rate {
1073 return; }
1075
1076 self.inject_human_error(entry, persona);
1078 }
1079
1080 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1089 use chrono::Datelike;
1090
1091 let mut rate = base_rate;
1092 let day = posting_date.day();
1093 let month = posting_date.month();
1094
1095 if month == 12 && day >= 28 {
1097 rate *= 2.0;
1098 return rate.min(0.5); }
1100
1101 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1103 rate *= 1.75; return rate.min(0.4);
1105 }
1106
1107 if day >= 28 {
1109 rate *= 1.5; }
1111
1112 let weekday = posting_date.weekday();
1114 match weekday {
1115 chrono::Weekday::Mon => {
1116 rate *= 1.2;
1118 }
1119 chrono::Weekday::Fri => {
1120 rate *= 1.3;
1122 }
1123 _ => {}
1124 }
1125
1126 rate.min(0.4)
1128 }
1129
1130 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1139 use rust_decimal::Decimal;
1140
1141 if amount < Decimal::from(10) {
1143 return amount;
1144 }
1145
1146 if self.rng.gen::<f64>() > 0.70 {
1148 return amount;
1149 }
1150
1151 let variation_type: u8 = self.rng.gen_range(0..4);
1153
1154 match variation_type {
1155 0 => {
1156 let variation_pct = self.rng.gen_range(-0.02..0.02);
1158 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1159 (amount + variation).round_dp(2)
1160 }
1161 1 => {
1162 let ten = Decimal::from(10);
1164 (amount / ten).round() * ten
1165 }
1166 2 => {
1167 if amount >= Decimal::from(500) {
1169 let hundred = Decimal::from(100);
1170 (amount / hundred).round() * hundred
1171 } else {
1172 amount
1173 }
1174 }
1175 3 => {
1176 let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1178 (amount + cents).max(Decimal::ZERO).round_dp(2)
1179 }
1180 _ => amount,
1181 }
1182 }
1183
1184 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1190 let balancing_idx = entry.lines.iter().position(|l| {
1192 if modified_was_debit {
1193 l.credit_amount > Decimal::ZERO
1194 } else {
1195 l.debit_amount > Decimal::ZERO
1196 }
1197 });
1198
1199 if let Some(idx) = balancing_idx {
1200 if modified_was_debit {
1201 entry.lines[idx].credit_amount += impact;
1202 } else {
1203 entry.lines[idx].debit_amount += impact;
1204 }
1205 }
1206 }
1207
1208 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1213 use rust_decimal::Decimal;
1214
1215 let error_type: u8 = match persona {
1217 UserPersona::JuniorAccountant => {
1218 self.rng.gen_range(0..5)
1220 }
1221 UserPersona::SeniorAccountant => {
1222 self.rng.gen_range(0..3)
1224 }
1225 UserPersona::Controller | UserPersona::Manager => {
1226 self.rng.gen_range(3..5)
1228 }
1229 _ => return,
1230 };
1231
1232 match error_type {
1233 0 => {
1234 if let Some(line) = entry.lines.get_mut(0) {
1236 let is_debit = line.debit_amount > Decimal::ZERO;
1237 let original_amount = if is_debit {
1238 line.debit_amount
1239 } else {
1240 line.credit_amount
1241 };
1242
1243 let s = original_amount.to_string();
1245 if s.len() >= 2 {
1246 let chars: Vec<char> = s.chars().collect();
1247 let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1248 if chars[pos].is_ascii_digit()
1249 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1250 {
1251 let mut new_chars = chars;
1252 new_chars.swap(pos, pos + 1);
1253 if let Ok(new_amount) =
1254 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1255 {
1256 let impact = new_amount - original_amount;
1257
1258 if is_debit {
1260 entry.lines[0].debit_amount = new_amount;
1261 } else {
1262 entry.lines[0].credit_amount = new_amount;
1263 }
1264
1265 Self::rebalance_entry(entry, is_debit, impact);
1267
1268 entry.header.header_text = Some(
1269 entry.header.header_text.clone().unwrap_or_default()
1270 + " [HUMAN_ERROR:TRANSPOSITION]",
1271 );
1272 }
1273 }
1274 }
1275 }
1276 }
1277 1 => {
1278 if let Some(line) = entry.lines.get_mut(0) {
1280 let is_debit = line.debit_amount > Decimal::ZERO;
1281 let original_amount = if is_debit {
1282 line.debit_amount
1283 } else {
1284 line.credit_amount
1285 };
1286
1287 let new_amount = original_amount * Decimal::new(10, 0);
1288 let impact = new_amount - original_amount;
1289
1290 if is_debit {
1292 entry.lines[0].debit_amount = new_amount;
1293 } else {
1294 entry.lines[0].credit_amount = new_amount;
1295 }
1296
1297 Self::rebalance_entry(entry, is_debit, impact);
1299
1300 entry.header.header_text = Some(
1301 entry.header.header_text.clone().unwrap_or_default()
1302 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1303 );
1304 }
1305 }
1306 2 => {
1307 if let Some(ref mut text) = entry.header.header_text {
1309 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1310 let correct = ["the", "and", "with", "that", "receive"];
1311 let idx = self.rng.gen_range(0..typos.len());
1312 if text.to_lowercase().contains(correct[idx]) {
1313 *text = text.replace(correct[idx], typos[idx]);
1314 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1315 }
1316 }
1317 }
1318 3 => {
1319 if let Some(line) = entry.lines.get_mut(0) {
1321 let is_debit = line.debit_amount > Decimal::ZERO;
1322 let original_amount = if is_debit {
1323 line.debit_amount
1324 } else {
1325 line.credit_amount
1326 };
1327
1328 let new_amount =
1329 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1330 let impact = new_amount - original_amount;
1331
1332 if is_debit {
1334 entry.lines[0].debit_amount = new_amount;
1335 } else {
1336 entry.lines[0].credit_amount = new_amount;
1337 }
1338
1339 Self::rebalance_entry(entry, is_debit, impact);
1341
1342 entry.header.header_text = Some(
1343 entry.header.header_text.clone().unwrap_or_default()
1344 + " [HUMAN_ERROR:ROUNDED]",
1345 );
1346 }
1347 }
1348 4 => {
1349 if entry.header.document_date == entry.header.posting_date {
1352 let days_late = self.rng.gen_range(5..15);
1353 entry.header.document_date =
1354 entry.header.posting_date - chrono::Duration::days(days_late);
1355 entry.header.header_text = Some(
1356 entry.header.header_text.clone().unwrap_or_default()
1357 + " [HUMAN_ERROR:LATE_POSTING]",
1358 );
1359 }
1360 }
1361 _ => {}
1362 }
1363 }
1364
1365 fn maybe_apply_approval_workflow(
1370 &mut self,
1371 entry: &mut JournalEntry,
1372 _posting_date: NaiveDate,
1373 ) {
1374 use rust_decimal::Decimal;
1375
1376 let amount = entry.total_debit();
1377
1378 if amount <= self.approval_threshold {
1380 let workflow = ApprovalWorkflow::auto_approved(
1382 entry.header.created_by.clone(),
1383 entry.header.user_persona.clone(),
1384 amount,
1385 entry.header.created_at,
1386 );
1387 entry.header.approval_workflow = Some(workflow);
1388 return;
1389 }
1390
1391 entry.header.sox_relevant = true;
1393
1394 let required_levels = if amount > Decimal::new(100000, 0) {
1396 3 } else if amount > Decimal::new(50000, 0) {
1398 2 } else {
1400 1 };
1402
1403 let mut workflow = ApprovalWorkflow::new(
1405 entry.header.created_by.clone(),
1406 entry.header.user_persona.clone(),
1407 amount,
1408 );
1409 workflow.required_levels = required_levels;
1410
1411 let submit_time = entry.header.created_at;
1413 let submit_action = ApprovalAction::new(
1414 entry.header.created_by.clone(),
1415 entry.header.user_persona.clone(),
1416 self.parse_persona(&entry.header.user_persona),
1417 ApprovalActionType::Submit,
1418 0,
1419 )
1420 .with_timestamp(submit_time);
1421
1422 workflow.actions.push(submit_action);
1423 workflow.status = ApprovalStatus::Pending;
1424 workflow.submitted_at = Some(submit_time);
1425
1426 let mut current_time = submit_time;
1428 for level in 1..=required_levels {
1429 let delay_hours = self.rng.gen_range(1..4);
1431 current_time += chrono::Duration::hours(delay_hours);
1432
1433 while current_time.weekday() == chrono::Weekday::Sat
1435 || current_time.weekday() == chrono::Weekday::Sun
1436 {
1437 current_time += chrono::Duration::days(1);
1438 }
1439
1440 let (approver_id, approver_role) = self.select_approver(level);
1442
1443 let approve_action = ApprovalAction::new(
1444 approver_id.clone(),
1445 format!("{:?}", approver_role),
1446 approver_role,
1447 ApprovalActionType::Approve,
1448 level,
1449 )
1450 .with_timestamp(current_time);
1451
1452 workflow.actions.push(approve_action);
1453 workflow.current_level = level;
1454 }
1455
1456 workflow.status = ApprovalStatus::Approved;
1458 workflow.approved_at = Some(current_time);
1459
1460 entry.header.approval_workflow = Some(workflow);
1461 }
1462
1463 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1465 let persona = match level {
1466 1 => UserPersona::Manager,
1467 2 => UserPersona::Controller,
1468 _ => UserPersona::Executive,
1469 };
1470
1471 if let Some(ref pool) = self.user_pool {
1473 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1474 return (user.user_id.clone(), persona);
1475 }
1476 }
1477
1478 let approver_id = match persona {
1480 UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1481 UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1482 UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1483 _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1484 };
1485
1486 (approver_id, persona)
1487 }
1488
1489 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1491 match persona_str.to_lowercase().as_str() {
1492 s if s.contains("junior") => UserPersona::JuniorAccountant,
1493 s if s.contains("senior") => UserPersona::SeniorAccountant,
1494 s if s.contains("controller") => UserPersona::Controller,
1495 s if s.contains("manager") => UserPersona::Manager,
1496 s if s.contains("executive") => UserPersona::Executive,
1497 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1498 _ => UserPersona::JuniorAccountant, }
1500 }
1501
1502 pub fn with_approval(mut self, enabled: bool) -> Self {
1504 self.approval_enabled = enabled;
1505 self
1506 }
1507
1508 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1510 self.approval_threshold = threshold;
1511 self
1512 }
1513
1514 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1520 self.drift_controller = Some(controller);
1521 self
1522 }
1523
1524 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1529 if config.enabled {
1530 let total_periods = self.calculate_total_periods();
1531 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1532 }
1533 self
1534 }
1535
1536 fn calculate_total_periods(&self) -> u32 {
1538 let start_year = self.start_date.year();
1539 let start_month = self.start_date.month();
1540 let end_year = self.end_date.year();
1541 let end_month = self.end_date.month();
1542
1543 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1544 }
1545
1546 fn date_to_period(&self, date: NaiveDate) -> u32 {
1548 let start_year = self.start_date.year();
1549 let start_month = self.start_date.month() as i32;
1550 let date_year = date.year();
1551 let date_month = date.month() as i32;
1552
1553 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1554 }
1555
1556 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1558 if let Some(ref controller) = self.drift_controller {
1559 let period = self.date_to_period(date);
1560 controller.compute_adjustments(period)
1561 } else {
1562 DriftAdjustments::none()
1563 }
1564 }
1565
1566 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1568 if let Some(ref pool) = self.user_pool {
1569 let persona = if is_automated {
1570 UserPersona::AutomatedSystem
1571 } else {
1572 let roll: f64 = self.rng.gen();
1574 if roll < 0.4 {
1575 UserPersona::JuniorAccountant
1576 } else if roll < 0.7 {
1577 UserPersona::SeniorAccountant
1578 } else if roll < 0.85 {
1579 UserPersona::Controller
1580 } else {
1581 UserPersona::Manager
1582 }
1583 };
1584
1585 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1586 return (
1587 user.user_id.clone(),
1588 format!("{:?}", user.persona).to_lowercase(),
1589 );
1590 }
1591 }
1592
1593 if is_automated {
1595 (
1596 format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1597 "automated_system".to_string(),
1598 )
1599 } else {
1600 (
1601 format!("USER{:04}", self.rng.gen_range(1..=40)),
1602 "senior_accountant".to_string(),
1603 )
1604 }
1605 }
1606
1607 fn select_source(&mut self) -> TransactionSource {
1609 let roll: f64 = self.rng.gen();
1610 let dist = &self.config.source_distribution;
1611
1612 if roll < dist.manual {
1613 TransactionSource::Manual
1614 } else if roll < dist.manual + dist.automated {
1615 TransactionSource::Automated
1616 } else if roll < dist.manual + dist.automated + dist.recurring {
1617 TransactionSource::Recurring
1618 } else {
1619 TransactionSource::Adjustment
1620 }
1621 }
1622
1623 fn select_business_process(&mut self) -> BusinessProcess {
1625 let roll: f64 = self.rng.gen();
1626
1627 if roll < 0.35 {
1629 BusinessProcess::O2C
1630 } else if roll < 0.65 {
1631 BusinessProcess::P2P
1632 } else if roll < 0.85 {
1633 BusinessProcess::R2R
1634 } else if roll < 0.95 {
1635 BusinessProcess::H2R
1636 } else {
1637 BusinessProcess::A2R
1638 }
1639 }
1640
1641 fn select_debit_account(&mut self) -> &GLAccount {
1642 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1643 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1644
1645 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1647 accounts
1648 } else {
1649 expense_accounts
1650 };
1651
1652 all.choose(&mut self.rng)
1653 .copied()
1654 .unwrap_or_else(|| &self.coa.accounts[0])
1655 }
1656
1657 fn select_credit_account(&mut self) -> &GLAccount {
1658 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1659 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1660
1661 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1663 liability_accounts
1664 } else {
1665 revenue_accounts
1666 };
1667
1668 all.choose(&mut self.rng)
1669 .copied()
1670 .unwrap_or_else(|| &self.coa.accounts[0])
1671 }
1672}
1673
1674impl Generator for JournalEntryGenerator {
1675 type Item = JournalEntry;
1676 type Config = (
1677 TransactionConfig,
1678 Arc<ChartOfAccounts>,
1679 Vec<String>,
1680 NaiveDate,
1681 NaiveDate,
1682 );
1683
1684 fn new(config: Self::Config, seed: u64) -> Self {
1685 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1686 }
1687
1688 fn generate_one(&mut self) -> Self::Item {
1689 self.generate()
1690 }
1691
1692 fn reset(&mut self) {
1693 self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1694 self.line_sampler.reset(self.seed + 1);
1695 self.amount_sampler.reset(self.seed + 2);
1696 self.temporal_sampler.reset(self.seed + 3);
1697 self.count = 0;
1698 self.uuid_factory.reset();
1699
1700 let mut ref_gen = ReferenceGenerator::new(
1702 self.start_date.year(),
1703 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1704 );
1705 ref_gen.set_prefix(
1706 ReferenceType::Invoice,
1707 &self.template_config.references.invoice_prefix,
1708 );
1709 ref_gen.set_prefix(
1710 ReferenceType::PurchaseOrder,
1711 &self.template_config.references.po_prefix,
1712 );
1713 ref_gen.set_prefix(
1714 ReferenceType::SalesOrder,
1715 &self.template_config.references.so_prefix,
1716 );
1717 self.reference_generator = ref_gen;
1718 }
1719
1720 fn count(&self) -> u64 {
1721 self.count
1722 }
1723
1724 fn seed(&self) -> u64 {
1725 self.seed
1726 }
1727}
1728
1729#[cfg(test)]
1730#[allow(clippy::unwrap_used)]
1731mod tests {
1732 use super::*;
1733 use crate::ChartOfAccountsGenerator;
1734
1735 #[test]
1736 fn test_generate_balanced_entries() {
1737 let mut coa_gen =
1738 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1739 let coa = Arc::new(coa_gen.generate());
1740
1741 let mut je_gen = JournalEntryGenerator::new_with_params(
1742 TransactionConfig::default(),
1743 coa,
1744 vec!["1000".to_string()],
1745 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1746 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1747 42,
1748 );
1749
1750 let mut balanced_count = 0;
1751 for _ in 0..100 {
1752 let entry = je_gen.generate();
1753
1754 let has_human_error = entry
1756 .header
1757 .header_text
1758 .as_ref()
1759 .map(|t| t.contains("[HUMAN_ERROR:"))
1760 .unwrap_or(false);
1761
1762 if !has_human_error {
1763 assert!(
1764 entry.is_balanced(),
1765 "Entry {:?} is not balanced",
1766 entry.header.document_id
1767 );
1768 balanced_count += 1;
1769 }
1770 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1771 }
1772
1773 assert!(
1775 balanced_count >= 80,
1776 "Expected at least 80 balanced entries, got {}",
1777 balanced_count
1778 );
1779 }
1780
1781 #[test]
1782 fn test_deterministic_generation() {
1783 let mut coa_gen =
1784 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1785 let coa = Arc::new(coa_gen.generate());
1786
1787 let mut gen1 = JournalEntryGenerator::new_with_params(
1788 TransactionConfig::default(),
1789 Arc::clone(&coa),
1790 vec!["1000".to_string()],
1791 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1792 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1793 42,
1794 );
1795
1796 let mut gen2 = JournalEntryGenerator::new_with_params(
1797 TransactionConfig::default(),
1798 coa,
1799 vec!["1000".to_string()],
1800 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1801 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1802 42,
1803 );
1804
1805 for _ in 0..50 {
1806 let e1 = gen1.generate();
1807 let e2 = gen2.generate();
1808 assert_eq!(e1.header.document_id, e2.header.document_id);
1809 assert_eq!(e1.total_debit(), e2.total_debit());
1810 }
1811 }
1812
1813 #[test]
1814 fn test_templates_generate_descriptions() {
1815 let mut coa_gen =
1816 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1817 let coa = Arc::new(coa_gen.generate());
1818
1819 let template_config = TemplateConfig {
1821 names: datasynth_config::schema::NameTemplateConfig {
1822 generate_realistic_names: true,
1823 email_domain: "test.com".to_string(),
1824 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1825 },
1826 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1827 generate_header_text: true,
1828 generate_line_text: true,
1829 },
1830 references: datasynth_config::schema::ReferenceTemplateConfig {
1831 generate_references: true,
1832 invoice_prefix: "TEST-INV".to_string(),
1833 po_prefix: "TEST-PO".to_string(),
1834 so_prefix: "TEST-SO".to_string(),
1835 },
1836 };
1837
1838 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1839 TransactionConfig::default(),
1840 coa,
1841 vec!["1000".to_string()],
1842 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1843 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1844 42,
1845 template_config,
1846 None,
1847 )
1848 .with_persona_errors(false); for _ in 0..10 {
1851 let entry = je_gen.generate();
1852
1853 assert!(
1855 entry.header.header_text.is_some(),
1856 "Header text should be populated"
1857 );
1858
1859 assert!(
1861 entry.header.reference.is_some(),
1862 "Reference should be populated"
1863 );
1864
1865 assert!(
1867 entry.header.business_process.is_some(),
1868 "Business process should be set"
1869 );
1870
1871 for line in &entry.lines {
1873 assert!(line.line_text.is_some(), "Line text should be populated");
1874 }
1875
1876 assert!(entry.is_balanced());
1878 }
1879 }
1880
1881 #[test]
1882 fn test_user_pool_integration() {
1883 let mut coa_gen =
1884 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1885 let coa = Arc::new(coa_gen.generate());
1886
1887 let companies = vec!["1000".to_string()];
1888
1889 let mut user_gen = crate::UserGenerator::new(42);
1891 let user_pool = user_gen.generate_standard(&companies);
1892
1893 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1894 TransactionConfig::default(),
1895 coa,
1896 companies,
1897 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1898 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1899 42,
1900 TemplateConfig::default(),
1901 Some(user_pool),
1902 );
1903
1904 for _ in 0..20 {
1906 let entry = je_gen.generate();
1907
1908 assert!(!entry.header.created_by.is_empty());
1911 }
1912 }
1913
1914 #[test]
1915 fn test_master_data_connection() {
1916 let mut coa_gen =
1917 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1918 let coa = Arc::new(coa_gen.generate());
1919
1920 let vendors = vec![
1922 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1923 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1924 ];
1925
1926 let customers = vec![
1928 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1929 Customer::new(
1930 "C-TEST-002",
1931 "Test Customer Two",
1932 CustomerType::SmallBusiness,
1933 ),
1934 ];
1935
1936 let materials = vec![Material::new(
1938 "MAT-TEST-001",
1939 "Test Material A",
1940 MaterialType::RawMaterial,
1941 )];
1942
1943 let generator = JournalEntryGenerator::new_with_params(
1945 TransactionConfig::default(),
1946 coa,
1947 vec!["1000".to_string()],
1948 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1949 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1950 42,
1951 );
1952
1953 assert!(!generator.is_using_real_master_data());
1955
1956 let generator_with_data = generator
1958 .with_vendors(&vendors)
1959 .with_customers(&customers)
1960 .with_materials(&materials);
1961
1962 assert!(generator_with_data.is_using_real_master_data());
1964 }
1965
1966 #[test]
1967 fn test_with_master_data_convenience_method() {
1968 let mut coa_gen =
1969 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1970 let coa = Arc::new(coa_gen.generate());
1971
1972 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1973 let customers = vec![Customer::new(
1974 "C-001",
1975 "Customer One",
1976 CustomerType::Corporate,
1977 )];
1978 let materials = vec![Material::new(
1979 "MAT-001",
1980 "Material One",
1981 MaterialType::RawMaterial,
1982 )];
1983
1984 let generator = JournalEntryGenerator::new_with_params(
1985 TransactionConfig::default(),
1986 coa,
1987 vec!["1000".to_string()],
1988 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1989 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1990 42,
1991 )
1992 .with_master_data(&vendors, &customers, &materials);
1993
1994 assert!(generator.is_using_real_master_data());
1995 }
1996
1997 #[test]
1998 fn test_stress_factors_increase_error_rate() {
1999 let mut coa_gen =
2000 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2001 let coa = Arc::new(coa_gen.generate());
2002
2003 let generator = JournalEntryGenerator::new_with_params(
2004 TransactionConfig::default(),
2005 coa,
2006 vec!["1000".to_string()],
2007 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2008 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2009 42,
2010 );
2011
2012 let base_rate = 0.1;
2013
2014 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2017 assert!(
2018 (regular_rate - base_rate).abs() < 0.01,
2019 "Regular day should have minimal stress factor adjustment"
2020 );
2021
2022 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2025 assert!(
2026 month_end_rate > regular_rate,
2027 "Month end should have higher error rate than regular day"
2028 );
2029
2030 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2033 assert!(
2034 year_end_rate > month_end_rate,
2035 "Year end should have highest error rate"
2036 );
2037
2038 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2041 assert!(
2042 friday_rate > regular_rate,
2043 "Friday should have higher error rate than mid-week"
2044 );
2045
2046 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2049 assert!(
2050 monday_rate > regular_rate,
2051 "Monday should have higher error rate than mid-week"
2052 );
2053 }
2054
2055 #[test]
2056 fn test_batching_produces_similar_entries() {
2057 let mut coa_gen =
2058 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2059 let coa = Arc::new(coa_gen.generate());
2060
2061 let mut je_gen = JournalEntryGenerator::new_with_params(
2063 TransactionConfig::default(),
2064 coa,
2065 vec!["1000".to_string()],
2066 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2067 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2068 123,
2069 )
2070 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2074
2075 for entry in &entries {
2077 assert!(
2078 entry.is_balanced(),
2079 "All entries including batched should be balanced"
2080 );
2081 }
2082
2083 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2085 std::collections::HashMap::new();
2086 for entry in &entries {
2087 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2088 }
2089
2090 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2092 assert!(
2093 dates_with_multiple > 0,
2094 "With batching, should see some dates with multiple entries"
2095 );
2096 }
2097
2098 #[test]
2099 fn test_temporal_patterns_business_days() {
2100 use datasynth_config::schema::{
2101 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2102 };
2103
2104 let mut coa_gen =
2105 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2106 let coa = Arc::new(coa_gen.generate());
2107
2108 let temporal_config = TemporalPatternsConfig {
2110 enabled: true,
2111 business_days: BusinessDaySchemaConfig {
2112 enabled: true,
2113 ..Default::default()
2114 },
2115 calendars: CalendarSchemaConfig {
2116 regions: vec!["US".to_string()],
2117 custom_holidays: vec![],
2118 },
2119 ..Default::default()
2120 };
2121
2122 let mut je_gen = JournalEntryGenerator::new_with_params(
2123 TransactionConfig::default(),
2124 coa,
2125 vec!["1000".to_string()],
2126 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2127 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2129 )
2130 .with_temporal_patterns(temporal_config, 42)
2131 .with_persona_errors(false);
2132
2133 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2135
2136 for entry in &entries {
2137 let weekday = entry.header.posting_date.weekday();
2138 assert!(
2139 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2140 "Posting date {:?} should not be a weekend",
2141 entry.header.posting_date
2142 );
2143 }
2144 }
2145}