1use chrono::{Datelike, NaiveDate};
4use rand::prelude::*;
5use rand_chacha::ChaCha8Rng;
6use rust_decimal::prelude::*;
7use rust_decimal::Decimal;
8use std::sync::Arc;
9
10use datasynth_config::schema::{
11 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
12};
13use datasynth_core::distributions::{
14 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
15 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
16 ProcessingLagCalculator, ProcessingLagConfig, *,
17};
18use datasynth_core::models::*;
19use datasynth_core::templates::{
20 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
21};
22use datasynth_core::traits::Generator;
23use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
24
25use crate::company_selector::WeightedCompanySelector;
26use crate::user_generator::{UserGenerator, UserGeneratorConfig};
27
28pub struct JournalEntryGenerator {
30 rng: ChaCha8Rng,
31 seed: u64,
32 config: TransactionConfig,
33 coa: Arc<ChartOfAccounts>,
34 companies: Vec<String>,
35 company_selector: WeightedCompanySelector,
36 line_sampler: LineItemSampler,
37 amount_sampler: AmountSampler,
38 temporal_sampler: TemporalSampler,
39 start_date: NaiveDate,
40 end_date: NaiveDate,
41 count: u64,
42 uuid_factory: DeterministicUuidFactory,
43 user_pool: Option<UserPool>,
45 description_generator: DescriptionGenerator,
46 reference_generator: ReferenceGenerator,
47 template_config: TemplateConfig,
48 vendor_pool: VendorPool,
49 customer_pool: CustomerPool,
50 material_pool: Option<MaterialPool>,
52 using_real_master_data: bool,
54 fraud_config: FraudConfig,
56 persona_errors_enabled: bool,
58 approval_enabled: bool,
60 approval_threshold: rust_decimal::Decimal,
61 batch_state: Option<BatchState>,
63 drift_controller: Option<DriftController>,
65 business_day_calculator: Option<BusinessDayCalculator>,
67 processing_lag_calculator: Option<ProcessingLagCalculator>,
68 temporal_patterns_config: Option<TemporalPatternsConfig>,
69}
70
71#[derive(Clone)]
76struct BatchState {
77 base_vendor: Option<String>,
79 base_customer: Option<String>,
80 base_account_number: String,
81 base_amount: rust_decimal::Decimal,
82 base_business_process: Option<BusinessProcess>,
83 base_posting_date: NaiveDate,
84 remaining: u8,
86}
87
88impl JournalEntryGenerator {
89 pub fn new_with_params(
91 config: TransactionConfig,
92 coa: Arc<ChartOfAccounts>,
93 companies: Vec<String>,
94 start_date: NaiveDate,
95 end_date: NaiveDate,
96 seed: u64,
97 ) -> Self {
98 Self::new_with_full_config(
99 config,
100 coa,
101 companies,
102 start_date,
103 end_date,
104 seed,
105 TemplateConfig::default(),
106 None,
107 )
108 }
109
110 #[allow(clippy::too_many_arguments)]
112 pub fn new_with_full_config(
113 config: TransactionConfig,
114 coa: Arc<ChartOfAccounts>,
115 companies: Vec<String>,
116 start_date: NaiveDate,
117 end_date: NaiveDate,
118 seed: u64,
119 template_config: TemplateConfig,
120 user_pool: Option<UserPool>,
121 ) -> Self {
122 let user_pool = user_pool.or_else(|| {
124 if template_config.names.generate_realistic_names {
125 let user_gen_config = UserGeneratorConfig {
126 culture_distribution: vec![
127 (
128 datasynth_core::templates::NameCulture::WesternUs,
129 template_config.names.culture_distribution.western_us,
130 ),
131 (
132 datasynth_core::templates::NameCulture::Hispanic,
133 template_config.names.culture_distribution.hispanic,
134 ),
135 (
136 datasynth_core::templates::NameCulture::German,
137 template_config.names.culture_distribution.german,
138 ),
139 (
140 datasynth_core::templates::NameCulture::French,
141 template_config.names.culture_distribution.french,
142 ),
143 (
144 datasynth_core::templates::NameCulture::Chinese,
145 template_config.names.culture_distribution.chinese,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Japanese,
149 template_config.names.culture_distribution.japanese,
150 ),
151 (
152 datasynth_core::templates::NameCulture::Indian,
153 template_config.names.culture_distribution.indian,
154 ),
155 ],
156 email_domain: template_config.names.email_domain.clone(),
157 generate_realistic_names: true,
158 };
159 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
160 Some(user_gen.generate_standard(&companies))
161 } else {
162 None
163 }
164 });
165
166 let mut ref_gen = ReferenceGenerator::new(
168 start_date.year(),
169 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
170 );
171 ref_gen.set_prefix(
172 ReferenceType::Invoice,
173 &template_config.references.invoice_prefix,
174 );
175 ref_gen.set_prefix(
176 ReferenceType::PurchaseOrder,
177 &template_config.references.po_prefix,
178 );
179 ref_gen.set_prefix(
180 ReferenceType::SalesOrder,
181 &template_config.references.so_prefix,
182 );
183
184 let company_selector = WeightedCompanySelector::uniform(companies.clone());
186
187 Self {
188 rng: ChaCha8Rng::seed_from_u64(seed),
189 seed,
190 config: config.clone(),
191 coa,
192 companies,
193 company_selector,
194 line_sampler: LineItemSampler::with_config(
195 seed + 1,
196 config.line_item_distribution.clone(),
197 config.even_odd_distribution.clone(),
198 config.debit_credit_distribution.clone(),
199 ),
200 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
201 temporal_sampler: TemporalSampler::with_config(
202 seed + 3,
203 config.seasonality.clone(),
204 WorkingHoursConfig::default(),
205 Vec::new(),
206 ),
207 start_date,
208 end_date,
209 count: 0,
210 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
211 user_pool,
212 description_generator: DescriptionGenerator::new(),
213 reference_generator: ref_gen,
214 template_config,
215 vendor_pool: VendorPool::standard(),
216 customer_pool: CustomerPool::standard(),
217 material_pool: None,
218 using_real_master_data: false,
219 fraud_config: FraudConfig::default(),
220 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
224 drift_controller: None,
225 business_day_calculator: None,
226 processing_lag_calculator: None,
227 temporal_patterns_config: None,
228 }
229 }
230
231 pub fn from_generator_config(
236 full_config: &GeneratorConfig,
237 coa: Arc<ChartOfAccounts>,
238 start_date: NaiveDate,
239 end_date: NaiveDate,
240 seed: u64,
241 ) -> Self {
242 let companies: Vec<String> = full_config
243 .companies
244 .iter()
245 .map(|c| c.code.clone())
246 .collect();
247
248 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
250
251 let mut generator = Self::new_with_full_config(
252 full_config.transactions.clone(),
253 coa,
254 companies,
255 start_date,
256 end_date,
257 seed,
258 full_config.templates.clone(),
259 None,
260 );
261
262 generator.company_selector = company_selector;
264
265 generator.fraud_config = full_config.fraud.clone();
267
268 let temporal_config = &full_config.temporal_patterns;
270 if temporal_config.enabled {
271 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
272 }
273
274 generator
275 }
276
277 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
284 if config.business_days.enabled {
286 let region = config
287 .calendars
288 .regions
289 .first()
290 .map(|r| Self::parse_region(r))
291 .unwrap_or(Region::US);
292
293 let calendar = HolidayCalendar::new(region, self.start_date.year());
294 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
295 }
296
297 if config.processing_lags.enabled {
299 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
300 self.processing_lag_calculator =
301 Some(ProcessingLagCalculator::with_config(seed, lag_config));
302 }
303
304 let model = config.period_end.model.as_deref().unwrap_or("flat");
306 if model != "flat"
307 || config
308 .period_end
309 .month_end
310 .as_ref()
311 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
312 {
313 let dynamics = Self::convert_period_end_config(&config.period_end);
314 self.temporal_sampler.set_period_end_dynamics(dynamics);
315 }
316
317 self.temporal_patterns_config = Some(config);
318 self
319 }
320
321 fn convert_processing_lag_config(
323 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
324 ) -> ProcessingLagConfig {
325 let mut config = ProcessingLagConfig {
326 enabled: schema.enabled,
327 ..Default::default()
328 };
329
330 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
332 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
333 if let Some(min) = lag.min_hours {
334 dist.min_lag_hours = min;
335 }
336 if let Some(max) = lag.max_hours {
337 dist.max_lag_hours = max;
338 }
339 dist
340 };
341
342 if let Some(ref lag) = schema.sales_order_lag {
344 config
345 .event_lags
346 .insert(EventType::SalesOrder, convert_lag(lag));
347 }
348 if let Some(ref lag) = schema.purchase_order_lag {
349 config
350 .event_lags
351 .insert(EventType::PurchaseOrder, convert_lag(lag));
352 }
353 if let Some(ref lag) = schema.goods_receipt_lag {
354 config
355 .event_lags
356 .insert(EventType::GoodsReceipt, convert_lag(lag));
357 }
358 if let Some(ref lag) = schema.invoice_receipt_lag {
359 config
360 .event_lags
361 .insert(EventType::InvoiceReceipt, convert_lag(lag));
362 }
363 if let Some(ref lag) = schema.invoice_issue_lag {
364 config
365 .event_lags
366 .insert(EventType::InvoiceIssue, convert_lag(lag));
367 }
368 if let Some(ref lag) = schema.payment_lag {
369 config
370 .event_lags
371 .insert(EventType::Payment, convert_lag(lag));
372 }
373 if let Some(ref lag) = schema.journal_entry_lag {
374 config
375 .event_lags
376 .insert(EventType::JournalEntry, convert_lag(lag));
377 }
378
379 if let Some(ref cross_day) = schema.cross_day_posting {
381 config.cross_day = CrossDayConfig {
382 enabled: cross_day.enabled,
383 probability_by_hour: cross_day.probability_by_hour.clone(),
384 ..Default::default()
385 };
386 }
387
388 config
389 }
390
391 fn convert_period_end_config(
393 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
394 ) -> PeriodEndDynamics {
395 let model_type = schema.model.as_deref().unwrap_or("exponential");
396
397 let convert_period =
399 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
400 default_peak: f64|
401 -> PeriodEndConfig {
402 if let Some(p) = period {
403 let model = match model_type {
404 "flat" => PeriodEndModel::FlatMultiplier {
405 multiplier: p.peak_multiplier.unwrap_or(default_peak),
406 },
407 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
408 start_day: p.start_day.unwrap_or(-10),
409 sustained_high_days: p.sustained_high_days.unwrap_or(3),
410 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
411 ramp_up_days: 3, },
413 _ => PeriodEndModel::ExponentialAcceleration {
414 start_day: p.start_day.unwrap_or(-10),
415 base_multiplier: p.base_multiplier.unwrap_or(1.0),
416 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
417 decay_rate: p.decay_rate.unwrap_or(0.3),
418 },
419 };
420 PeriodEndConfig {
421 enabled: true,
422 model,
423 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
424 }
425 } else {
426 PeriodEndConfig {
427 enabled: true,
428 model: PeriodEndModel::ExponentialAcceleration {
429 start_day: -10,
430 base_multiplier: 1.0,
431 peak_multiplier: default_peak,
432 decay_rate: 0.3,
433 },
434 additional_multiplier: 1.0,
435 }
436 }
437 };
438
439 PeriodEndDynamics::new(
440 convert_period(schema.month_end.as_ref(), 2.0),
441 convert_period(schema.quarter_end.as_ref(), 3.5),
442 convert_period(schema.year_end.as_ref(), 5.0),
443 )
444 }
445
446 fn parse_region(region_str: &str) -> Region {
448 match region_str.to_uppercase().as_str() {
449 "US" => Region::US,
450 "DE" => Region::DE,
451 "GB" => Region::GB,
452 "CN" => Region::CN,
453 "JP" => Region::JP,
454 "IN" => Region::IN,
455 "BR" => Region::BR,
456 "MX" => Region::MX,
457 "AU" => Region::AU,
458 "SG" => Region::SG,
459 "KR" => Region::KR,
460 _ => Region::US,
461 }
462 }
463
464 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
466 self.company_selector = selector;
467 }
468
469 pub fn company_selector(&self) -> &WeightedCompanySelector {
471 &self.company_selector
472 }
473
474 pub fn set_fraud_config(&mut self, config: FraudConfig) {
476 self.fraud_config = config;
477 }
478
479 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
484 if !vendors.is_empty() {
485 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
486 self.using_real_master_data = true;
487 }
488 self
489 }
490
491 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
496 if !customers.is_empty() {
497 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
498 self.using_real_master_data = true;
499 }
500 self
501 }
502
503 pub fn with_materials(mut self, materials: &[Material]) -> Self {
507 if !materials.is_empty() {
508 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
509 self.using_real_master_data = true;
510 }
511 self
512 }
513
514 pub fn with_master_data(
519 self,
520 vendors: &[Vendor],
521 customers: &[Customer],
522 materials: &[Material],
523 ) -> Self {
524 self.with_vendors(vendors)
525 .with_customers(customers)
526 .with_materials(materials)
527 }
528
529 pub fn is_using_real_master_data(&self) -> bool {
531 self.using_real_master_data
532 }
533
534 fn determine_fraud(&mut self) -> Option<FraudType> {
536 if !self.fraud_config.enabled {
537 return None;
538 }
539
540 if self.rng.gen::<f64>() >= self.fraud_config.fraud_rate {
542 return None;
543 }
544
545 Some(self.select_fraud_type())
547 }
548
549 fn select_fraud_type(&mut self) -> FraudType {
551 let dist = &self.fraud_config.fraud_type_distribution;
552 let roll: f64 = self.rng.gen();
553
554 let mut cumulative = 0.0;
555
556 cumulative += dist.suspense_account_abuse;
557 if roll < cumulative {
558 return FraudType::SuspenseAccountAbuse;
559 }
560
561 cumulative += dist.fictitious_transaction;
562 if roll < cumulative {
563 return FraudType::FictitiousTransaction;
564 }
565
566 cumulative += dist.revenue_manipulation;
567 if roll < cumulative {
568 return FraudType::RevenueManipulation;
569 }
570
571 cumulative += dist.expense_capitalization;
572 if roll < cumulative {
573 return FraudType::ExpenseCapitalization;
574 }
575
576 cumulative += dist.split_transaction;
577 if roll < cumulative {
578 return FraudType::SplitTransaction;
579 }
580
581 cumulative += dist.timing_anomaly;
582 if roll < cumulative {
583 return FraudType::TimingAnomaly;
584 }
585
586 cumulative += dist.unauthorized_access;
587 if roll < cumulative {
588 return FraudType::UnauthorizedAccess;
589 }
590
591 FraudType::DuplicatePayment
593 }
594
595 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
597 match fraud_type {
598 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
599 FraudAmountPattern::ThresholdAdjacent
600 }
601 FraudType::FictitiousTransaction
602 | FraudType::FictitiousEntry
603 | FraudType::SuspenseAccountAbuse
604 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
605 FraudType::RevenueManipulation
606 | FraudType::ExpenseCapitalization
607 | FraudType::ImproperCapitalization
608 | FraudType::ReserveManipulation
609 | FraudType::UnauthorizedAccess
610 | FraudType::PrematureRevenue
611 | FraudType::UnderstatedLiabilities
612 | FraudType::OverstatedAssets
613 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
614 FraudType::DuplicatePayment
615 | FraudType::TimingAnomaly
616 | FraudType::SelfApproval
617 | FraudType::ExceededApprovalLimit
618 | FraudType::SegregationOfDutiesViolation
619 | FraudType::UnauthorizedApproval
620 | FraudType::CollusiveApproval
621 | FraudType::FictitiousVendor
622 | FraudType::ShellCompanyPayment
623 | FraudType::Kickback
624 | FraudType::KickbackScheme
625 | FraudType::InvoiceManipulation
626 | FraudType::AssetMisappropriation
627 | FraudType::InventoryTheft
628 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
629 FraudType::ImproperRevenueRecognition
631 | FraudType::ImproperPoAllocation
632 | FraudType::VariableConsiderationManipulation
633 | FraudType::ContractModificationMisstatement => {
634 FraudAmountPattern::StatisticallyImprobable
635 }
636 FraudType::LeaseClassificationManipulation
638 | FraudType::OffBalanceSheetLease
639 | FraudType::LeaseLiabilityUnderstatement
640 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
641 FraudType::FairValueHierarchyManipulation
643 | FraudType::Level3InputManipulation
644 | FraudType::ValuationTechniqueManipulation => {
645 FraudAmountPattern::StatisticallyImprobable
646 }
647 FraudType::DelayedImpairment
649 | FraudType::ImpairmentTestAvoidance
650 | FraudType::CashFlowProjectionManipulation
651 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
652 }
653 }
654
655 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
657 self.uuid_factory.next()
658 }
659
660 pub fn generate(&mut self) -> JournalEntry {
662 if let Some(ref state) = self.batch_state {
664 if state.remaining > 0 {
665 return self.generate_batched_entry();
666 }
667 }
668
669 self.count += 1;
670
671 let document_id = self.generate_deterministic_uuid();
673
674 let mut posting_date = self
676 .temporal_sampler
677 .sample_date(self.start_date, self.end_date);
678
679 if let Some(ref calc) = self.business_day_calculator {
681 if !calc.is_business_day(posting_date) {
682 posting_date = calc.next_business_day(posting_date, false);
684 if posting_date > self.end_date {
686 posting_date = calc.prev_business_day(self.end_date, true);
687 }
688 }
689 }
690
691 let company_code = self.company_selector.select(&mut self.rng).to_string();
693
694 let line_spec = self.line_sampler.sample();
696
697 let source = self.select_source();
699 let is_automated = matches!(
700 source,
701 TransactionSource::Automated | TransactionSource::Recurring
702 );
703
704 let business_process = self.select_business_process();
706
707 let fraud_type = self.determine_fraud();
709 let is_fraud = fraud_type.is_some();
710
711 let time = self.temporal_sampler.sample_time(!is_automated);
713 let created_at = posting_date.and_time(time).and_utc();
714
715 let (created_by, user_persona) = self.select_user(is_automated);
717
718 let mut header =
720 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
721 header.created_at = created_at;
722 header.source = source;
723 header.created_by = created_by;
724 header.user_persona = user_persona;
725 header.business_process = Some(business_process);
726 header.is_fraud = is_fraud;
727 header.fraud_type = fraud_type;
728
729 let mut context =
731 DescriptionContext::with_period(posting_date.month(), posting_date.year());
732
733 match business_process {
735 BusinessProcess::P2P => {
736 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
737 context.vendor_name = Some(vendor.name.clone());
738 }
739 }
740 BusinessProcess::O2C => {
741 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
742 context.customer_name = Some(customer.name.clone());
743 }
744 }
745 _ => {}
746 }
747
748 if self.template_config.descriptions.generate_header_text {
750 header.header_text = Some(self.description_generator.generate_header_text(
751 business_process,
752 &context,
753 &mut self.rng,
754 ));
755 }
756
757 if self.template_config.references.generate_references {
759 header.reference = Some(
760 self.reference_generator
761 .generate_for_process_year(business_process, posting_date.year()),
762 );
763 }
764
765 let mut entry = JournalEntry::new(header);
767
768 let base_amount = if let Some(ft) = fraud_type {
770 let pattern = self.fraud_type_to_amount_pattern(ft);
771 self.amount_sampler.sample_fraud(pattern)
772 } else {
773 self.amount_sampler.sample()
774 };
775
776 let drift_adjusted_amount = {
778 let drift = self.get_drift_adjustments(posting_date);
779 if drift.amount_mean_multiplier != 1.0 {
780 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
782 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
783 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
784 } else {
785 base_amount
786 }
787 };
788
789 let total_amount = if is_automated {
791 drift_adjusted_amount } else {
793 self.apply_human_variation(drift_adjusted_amount)
794 };
795
796 let debit_amounts = self
798 .amount_sampler
799 .sample_summing_to(line_spec.debit_count, total_amount);
800 for (i, amount) in debit_amounts.into_iter().enumerate() {
801 let account_number = self.select_debit_account().account_number.clone();
802 let mut line = JournalEntryLine::debit(
803 entry.header.document_id,
804 (i + 1) as u32,
805 account_number.clone(),
806 amount,
807 );
808
809 if self.template_config.descriptions.generate_line_text {
811 line.line_text = Some(self.description_generator.generate_line_text(
812 &account_number,
813 &context,
814 &mut self.rng,
815 ));
816 }
817
818 entry.add_line(line);
819 }
820
821 let credit_amounts = self
823 .amount_sampler
824 .sample_summing_to(line_spec.credit_count, total_amount);
825 for (i, amount) in credit_amounts.into_iter().enumerate() {
826 let account_number = self.select_credit_account().account_number.clone();
827 let mut line = JournalEntryLine::credit(
828 entry.header.document_id,
829 (line_spec.debit_count + i + 1) as u32,
830 account_number.clone(),
831 amount,
832 );
833
834 if self.template_config.descriptions.generate_line_text {
836 line.line_text = Some(self.description_generator.generate_line_text(
837 &account_number,
838 &context,
839 &mut self.rng,
840 ));
841 }
842
843 entry.add_line(line);
844 }
845
846 if self.persona_errors_enabled && !is_automated {
848 self.maybe_inject_persona_error(&mut entry);
849 }
850
851 if self.approval_enabled {
853 self.maybe_apply_approval_workflow(&mut entry, posting_date);
854 }
855
856 self.maybe_start_batch(&entry);
858
859 entry
860 }
861
862 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
867 self.persona_errors_enabled = enabled;
868 self
869 }
870
871 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
876 self.fraud_config = config;
877 self
878 }
879
880 pub fn persona_errors_enabled(&self) -> bool {
882 self.persona_errors_enabled
883 }
884
885 pub fn with_batching(mut self, enabled: bool) -> Self {
890 if !enabled {
891 self.batch_state = None;
892 }
893 self
894 }
895
896 pub fn batching_enabled(&self) -> bool {
898 true
900 }
901
902 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
907 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
909 return;
910 }
911
912 if self.rng.gen::<f64>() > 0.15 {
914 return;
915 }
916
917 let base_account = entry
919 .lines
920 .first()
921 .map(|l| l.gl_account.clone())
922 .unwrap_or_default();
923
924 let base_amount = entry.total_debit();
925
926 self.batch_state = Some(BatchState {
927 base_vendor: None, base_customer: None,
929 base_account_number: base_account,
930 base_amount,
931 base_business_process: entry.header.business_process,
932 base_posting_date: entry.header.posting_date,
933 remaining: self.rng.gen_range(2..7), });
935 }
936
937 fn generate_batched_entry(&mut self) -> JournalEntry {
945 use rust_decimal::Decimal;
946
947 if let Some(ref mut state) = self.batch_state {
949 state.remaining = state.remaining.saturating_sub(1);
950 }
951
952 let batch = self
953 .batch_state
954 .clone()
955 .expect("batch_state set before calling generate_batched_entry");
956
957 let posting_date = batch.base_posting_date;
959
960 self.count += 1;
961 let document_id = self.generate_deterministic_uuid();
962
963 let company_code = self.company_selector.select(&mut self.rng).to_string();
965
966 let _line_spec = LineItemSpec {
968 total_count: 2,
969 debit_count: 1,
970 credit_count: 1,
971 split_type: DebitCreditSplit::Equal,
972 };
973
974 let source = TransactionSource::Manual;
976
977 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
979
980 let time = self.temporal_sampler.sample_time(true);
982 let created_at = posting_date.and_time(time).and_utc();
983
984 let (created_by, user_persona) = self.select_user(false);
986
987 let mut header =
989 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
990 header.created_at = created_at;
991 header.source = source;
992 header.created_by = created_by;
993 header.user_persona = user_persona;
994 header.business_process = Some(business_process);
995
996 let variation = self.rng.gen_range(-0.15..0.15);
998 let varied_amount =
999 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1000 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1001
1002 let mut entry = JournalEntry::new(header);
1004
1005 let debit_line = JournalEntryLine::debit(
1007 entry.header.document_id,
1008 1,
1009 batch.base_account_number.clone(),
1010 total_amount,
1011 );
1012 entry.add_line(debit_line);
1013
1014 let credit_account = self.select_credit_account().account_number.clone();
1016 let credit_line =
1017 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1018 entry.add_line(credit_line);
1019
1020 if self.persona_errors_enabled {
1022 self.maybe_inject_persona_error(&mut entry);
1023 }
1024
1025 if self.approval_enabled {
1027 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1028 }
1029
1030 if batch.remaining <= 1 {
1032 self.batch_state = None;
1033 }
1034
1035 entry
1036 }
1037
1038 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1040 let persona_str = &entry.header.user_persona;
1042 let persona = match persona_str.to_lowercase().as_str() {
1043 s if s.contains("junior") => UserPersona::JuniorAccountant,
1044 s if s.contains("senior") => UserPersona::SeniorAccountant,
1045 s if s.contains("controller") => UserPersona::Controller,
1046 s if s.contains("manager") => UserPersona::Manager,
1047 s if s.contains("executive") => UserPersona::Executive,
1048 _ => return, };
1050
1051 let base_error_rate = persona.error_rate();
1053
1054 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1056
1057 if self.rng.gen::<f64>() >= adjusted_rate {
1059 return; }
1061
1062 self.inject_human_error(entry, persona);
1064 }
1065
1066 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1075 use chrono::Datelike;
1076
1077 let mut rate = base_rate;
1078 let day = posting_date.day();
1079 let month = posting_date.month();
1080
1081 if month == 12 && day >= 28 {
1083 rate *= 2.0;
1084 return rate.min(0.5); }
1086
1087 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1089 rate *= 1.75; return rate.min(0.4);
1091 }
1092
1093 if day >= 28 {
1095 rate *= 1.5; }
1097
1098 let weekday = posting_date.weekday();
1100 match weekday {
1101 chrono::Weekday::Mon => {
1102 rate *= 1.2;
1104 }
1105 chrono::Weekday::Fri => {
1106 rate *= 1.3;
1108 }
1109 _ => {}
1110 }
1111
1112 rate.min(0.4)
1114 }
1115
1116 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1125 use rust_decimal::Decimal;
1126
1127 if amount < Decimal::from(10) {
1129 return amount;
1130 }
1131
1132 if self.rng.gen::<f64>() > 0.70 {
1134 return amount;
1135 }
1136
1137 let variation_type: u8 = self.rng.gen_range(0..4);
1139
1140 match variation_type {
1141 0 => {
1142 let variation_pct = self.rng.gen_range(-0.02..0.02);
1144 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1145 (amount + variation).round_dp(2)
1146 }
1147 1 => {
1148 let ten = Decimal::from(10);
1150 (amount / ten).round() * ten
1151 }
1152 2 => {
1153 if amount >= Decimal::from(500) {
1155 let hundred = Decimal::from(100);
1156 (amount / hundred).round() * hundred
1157 } else {
1158 amount
1159 }
1160 }
1161 3 => {
1162 let cents = Decimal::new(self.rng.gen_range(-100..100), 2);
1164 (amount + cents).max(Decimal::ZERO).round_dp(2)
1165 }
1166 _ => amount,
1167 }
1168 }
1169
1170 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1176 let balancing_idx = entry.lines.iter().position(|l| {
1178 if modified_was_debit {
1179 l.credit_amount > Decimal::ZERO
1180 } else {
1181 l.debit_amount > Decimal::ZERO
1182 }
1183 });
1184
1185 if let Some(idx) = balancing_idx {
1186 if modified_was_debit {
1187 entry.lines[idx].credit_amount += impact;
1188 } else {
1189 entry.lines[idx].debit_amount += impact;
1190 }
1191 }
1192 }
1193
1194 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1199 use rust_decimal::Decimal;
1200
1201 let error_type: u8 = match persona {
1203 UserPersona::JuniorAccountant => {
1204 self.rng.gen_range(0..5)
1206 }
1207 UserPersona::SeniorAccountant => {
1208 self.rng.gen_range(0..3)
1210 }
1211 UserPersona::Controller | UserPersona::Manager => {
1212 self.rng.gen_range(3..5)
1214 }
1215 _ => return,
1216 };
1217
1218 match error_type {
1219 0 => {
1220 if let Some(line) = entry.lines.get_mut(0) {
1222 let is_debit = line.debit_amount > Decimal::ZERO;
1223 let original_amount = if is_debit {
1224 line.debit_amount
1225 } else {
1226 line.credit_amount
1227 };
1228
1229 let s = original_amount.to_string();
1231 if s.len() >= 2 {
1232 let chars: Vec<char> = s.chars().collect();
1233 let pos = self.rng.gen_range(0..chars.len().saturating_sub(1));
1234 if chars[pos].is_ascii_digit()
1235 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1236 {
1237 let mut new_chars = chars;
1238 new_chars.swap(pos, pos + 1);
1239 if let Ok(new_amount) =
1240 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1241 {
1242 let impact = new_amount - original_amount;
1243
1244 if is_debit {
1246 entry.lines[0].debit_amount = new_amount;
1247 } else {
1248 entry.lines[0].credit_amount = new_amount;
1249 }
1250
1251 Self::rebalance_entry(entry, is_debit, impact);
1253
1254 entry.header.header_text = Some(
1255 entry.header.header_text.clone().unwrap_or_default()
1256 + " [HUMAN_ERROR:TRANSPOSITION]",
1257 );
1258 }
1259 }
1260 }
1261 }
1262 }
1263 1 => {
1264 if let Some(line) = entry.lines.get_mut(0) {
1266 let is_debit = line.debit_amount > Decimal::ZERO;
1267 let original_amount = if is_debit {
1268 line.debit_amount
1269 } else {
1270 line.credit_amount
1271 };
1272
1273 let new_amount = original_amount * Decimal::new(10, 0);
1274 let impact = new_amount - original_amount;
1275
1276 if is_debit {
1278 entry.lines[0].debit_amount = new_amount;
1279 } else {
1280 entry.lines[0].credit_amount = new_amount;
1281 }
1282
1283 Self::rebalance_entry(entry, is_debit, impact);
1285
1286 entry.header.header_text = Some(
1287 entry.header.header_text.clone().unwrap_or_default()
1288 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1289 );
1290 }
1291 }
1292 2 => {
1293 if let Some(ref mut text) = entry.header.header_text {
1295 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1296 let correct = ["the", "and", "with", "that", "receive"];
1297 let idx = self.rng.gen_range(0..typos.len());
1298 if text.to_lowercase().contains(correct[idx]) {
1299 *text = text.replace(correct[idx], typos[idx]);
1300 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1301 }
1302 }
1303 }
1304 3 => {
1305 if let Some(line) = entry.lines.get_mut(0) {
1307 let is_debit = line.debit_amount > Decimal::ZERO;
1308 let original_amount = if is_debit {
1309 line.debit_amount
1310 } else {
1311 line.credit_amount
1312 };
1313
1314 let new_amount =
1315 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1316 let impact = new_amount - original_amount;
1317
1318 if is_debit {
1320 entry.lines[0].debit_amount = new_amount;
1321 } else {
1322 entry.lines[0].credit_amount = new_amount;
1323 }
1324
1325 Self::rebalance_entry(entry, is_debit, impact);
1327
1328 entry.header.header_text = Some(
1329 entry.header.header_text.clone().unwrap_or_default()
1330 + " [HUMAN_ERROR:ROUNDED]",
1331 );
1332 }
1333 }
1334 4 => {
1335 if entry.header.document_date == entry.header.posting_date {
1338 let days_late = self.rng.gen_range(5..15);
1339 entry.header.document_date =
1340 entry.header.posting_date - chrono::Duration::days(days_late);
1341 entry.header.header_text = Some(
1342 entry.header.header_text.clone().unwrap_or_default()
1343 + " [HUMAN_ERROR:LATE_POSTING]",
1344 );
1345 }
1346 }
1347 _ => {}
1348 }
1349 }
1350
1351 fn maybe_apply_approval_workflow(
1356 &mut self,
1357 entry: &mut JournalEntry,
1358 _posting_date: NaiveDate,
1359 ) {
1360 use rust_decimal::Decimal;
1361
1362 let amount = entry.total_debit();
1363
1364 if amount <= self.approval_threshold {
1366 let workflow = ApprovalWorkflow::auto_approved(
1368 entry.header.created_by.clone(),
1369 entry.header.user_persona.clone(),
1370 amount,
1371 entry.header.created_at,
1372 );
1373 entry.header.approval_workflow = Some(workflow);
1374 return;
1375 }
1376
1377 entry.header.sox_relevant = true;
1379
1380 let required_levels = if amount > Decimal::new(100000, 0) {
1382 3 } else if amount > Decimal::new(50000, 0) {
1384 2 } else {
1386 1 };
1388
1389 let mut workflow = ApprovalWorkflow::new(
1391 entry.header.created_by.clone(),
1392 entry.header.user_persona.clone(),
1393 amount,
1394 );
1395 workflow.required_levels = required_levels;
1396
1397 let submit_time = entry.header.created_at;
1399 let submit_action = ApprovalAction::new(
1400 entry.header.created_by.clone(),
1401 entry.header.user_persona.clone(),
1402 self.parse_persona(&entry.header.user_persona),
1403 ApprovalActionType::Submit,
1404 0,
1405 )
1406 .with_timestamp(submit_time);
1407
1408 workflow.actions.push(submit_action);
1409 workflow.status = ApprovalStatus::Pending;
1410 workflow.submitted_at = Some(submit_time);
1411
1412 let mut current_time = submit_time;
1414 for level in 1..=required_levels {
1415 let delay_hours = self.rng.gen_range(1..4);
1417 current_time += chrono::Duration::hours(delay_hours);
1418
1419 while current_time.weekday() == chrono::Weekday::Sat
1421 || current_time.weekday() == chrono::Weekday::Sun
1422 {
1423 current_time += chrono::Duration::days(1);
1424 }
1425
1426 let (approver_id, approver_role) = self.select_approver(level);
1428
1429 let approve_action = ApprovalAction::new(
1430 approver_id.clone(),
1431 format!("{:?}", approver_role),
1432 approver_role,
1433 ApprovalActionType::Approve,
1434 level,
1435 )
1436 .with_timestamp(current_time);
1437
1438 workflow.actions.push(approve_action);
1439 workflow.current_level = level;
1440 }
1441
1442 workflow.status = ApprovalStatus::Approved;
1444 workflow.approved_at = Some(current_time);
1445
1446 entry.header.approval_workflow = Some(workflow);
1447 }
1448
1449 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1451 let persona = match level {
1452 1 => UserPersona::Manager,
1453 2 => UserPersona::Controller,
1454 _ => UserPersona::Executive,
1455 };
1456
1457 if let Some(ref pool) = self.user_pool {
1459 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1460 return (user.user_id.clone(), persona);
1461 }
1462 }
1463
1464 let approver_id = match persona {
1466 UserPersona::Manager => format!("MGR{:04}", self.rng.gen_range(1..100)),
1467 UserPersona::Controller => format!("CTRL{:04}", self.rng.gen_range(1..20)),
1468 UserPersona::Executive => format!("EXEC{:04}", self.rng.gen_range(1..10)),
1469 _ => format!("USR{:04}", self.rng.gen_range(1..1000)),
1470 };
1471
1472 (approver_id, persona)
1473 }
1474
1475 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1477 match persona_str.to_lowercase().as_str() {
1478 s if s.contains("junior") => UserPersona::JuniorAccountant,
1479 s if s.contains("senior") => UserPersona::SeniorAccountant,
1480 s if s.contains("controller") => UserPersona::Controller,
1481 s if s.contains("manager") => UserPersona::Manager,
1482 s if s.contains("executive") => UserPersona::Executive,
1483 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1484 _ => UserPersona::JuniorAccountant, }
1486 }
1487
1488 pub fn with_approval(mut self, enabled: bool) -> Self {
1490 self.approval_enabled = enabled;
1491 self
1492 }
1493
1494 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1496 self.approval_threshold = threshold;
1497 self
1498 }
1499
1500 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1506 self.drift_controller = Some(controller);
1507 self
1508 }
1509
1510 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1515 if config.enabled {
1516 let total_periods = self.calculate_total_periods();
1517 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1518 }
1519 self
1520 }
1521
1522 fn calculate_total_periods(&self) -> u32 {
1524 let start_year = self.start_date.year();
1525 let start_month = self.start_date.month();
1526 let end_year = self.end_date.year();
1527 let end_month = self.end_date.month();
1528
1529 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1530 }
1531
1532 fn date_to_period(&self, date: NaiveDate) -> u32 {
1534 let start_year = self.start_date.year();
1535 let start_month = self.start_date.month() as i32;
1536 let date_year = date.year();
1537 let date_month = date.month() as i32;
1538
1539 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1540 }
1541
1542 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1544 if let Some(ref controller) = self.drift_controller {
1545 let period = self.date_to_period(date);
1546 controller.compute_adjustments(period)
1547 } else {
1548 DriftAdjustments::none()
1549 }
1550 }
1551
1552 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1554 if let Some(ref pool) = self.user_pool {
1555 let persona = if is_automated {
1556 UserPersona::AutomatedSystem
1557 } else {
1558 let roll: f64 = self.rng.gen();
1560 if roll < 0.4 {
1561 UserPersona::JuniorAccountant
1562 } else if roll < 0.7 {
1563 UserPersona::SeniorAccountant
1564 } else if roll < 0.85 {
1565 UserPersona::Controller
1566 } else {
1567 UserPersona::Manager
1568 }
1569 };
1570
1571 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1572 return (
1573 user.user_id.clone(),
1574 format!("{:?}", user.persona).to_lowercase(),
1575 );
1576 }
1577 }
1578
1579 if is_automated {
1581 (
1582 format!("BATCH{:04}", self.rng.gen_range(1..=20)),
1583 "automated_system".to_string(),
1584 )
1585 } else {
1586 (
1587 format!("USER{:04}", self.rng.gen_range(1..=40)),
1588 "senior_accountant".to_string(),
1589 )
1590 }
1591 }
1592
1593 fn select_source(&mut self) -> TransactionSource {
1595 let roll: f64 = self.rng.gen();
1596 let dist = &self.config.source_distribution;
1597
1598 if roll < dist.manual {
1599 TransactionSource::Manual
1600 } else if roll < dist.manual + dist.automated {
1601 TransactionSource::Automated
1602 } else if roll < dist.manual + dist.automated + dist.recurring {
1603 TransactionSource::Recurring
1604 } else {
1605 TransactionSource::Adjustment
1606 }
1607 }
1608
1609 fn select_business_process(&mut self) -> BusinessProcess {
1611 let roll: f64 = self.rng.gen();
1612
1613 if roll < 0.35 {
1615 BusinessProcess::O2C
1616 } else if roll < 0.65 {
1617 BusinessProcess::P2P
1618 } else if roll < 0.85 {
1619 BusinessProcess::R2R
1620 } else if roll < 0.95 {
1621 BusinessProcess::H2R
1622 } else {
1623 BusinessProcess::A2R
1624 }
1625 }
1626
1627 fn select_debit_account(&mut self) -> &GLAccount {
1628 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1629 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1630
1631 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1633 accounts
1634 } else {
1635 expense_accounts
1636 };
1637
1638 all.choose(&mut self.rng)
1639 .copied()
1640 .unwrap_or_else(|| &self.coa.accounts[0])
1641 }
1642
1643 fn select_credit_account(&mut self) -> &GLAccount {
1644 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1645 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1646
1647 let all: Vec<_> = if self.rng.gen::<f64>() < 0.6 {
1649 liability_accounts
1650 } else {
1651 revenue_accounts
1652 };
1653
1654 all.choose(&mut self.rng)
1655 .copied()
1656 .unwrap_or_else(|| &self.coa.accounts[0])
1657 }
1658}
1659
1660impl Generator for JournalEntryGenerator {
1661 type Item = JournalEntry;
1662 type Config = (
1663 TransactionConfig,
1664 Arc<ChartOfAccounts>,
1665 Vec<String>,
1666 NaiveDate,
1667 NaiveDate,
1668 );
1669
1670 fn new(config: Self::Config, seed: u64) -> Self {
1671 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1672 }
1673
1674 fn generate_one(&mut self) -> Self::Item {
1675 self.generate()
1676 }
1677
1678 fn reset(&mut self) {
1679 self.rng = ChaCha8Rng::seed_from_u64(self.seed);
1680 self.line_sampler.reset(self.seed + 1);
1681 self.amount_sampler.reset(self.seed + 2);
1682 self.temporal_sampler.reset(self.seed + 3);
1683 self.count = 0;
1684 self.uuid_factory.reset();
1685
1686 let mut ref_gen = ReferenceGenerator::new(
1688 self.start_date.year(),
1689 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1690 );
1691 ref_gen.set_prefix(
1692 ReferenceType::Invoice,
1693 &self.template_config.references.invoice_prefix,
1694 );
1695 ref_gen.set_prefix(
1696 ReferenceType::PurchaseOrder,
1697 &self.template_config.references.po_prefix,
1698 );
1699 ref_gen.set_prefix(
1700 ReferenceType::SalesOrder,
1701 &self.template_config.references.so_prefix,
1702 );
1703 self.reference_generator = ref_gen;
1704 }
1705
1706 fn count(&self) -> u64 {
1707 self.count
1708 }
1709
1710 fn seed(&self) -> u64 {
1711 self.seed
1712 }
1713}
1714
1715#[cfg(test)]
1716#[allow(clippy::unwrap_used)]
1717mod tests {
1718 use super::*;
1719 use crate::ChartOfAccountsGenerator;
1720
1721 #[test]
1722 fn test_generate_balanced_entries() {
1723 let mut coa_gen =
1724 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1725 let coa = Arc::new(coa_gen.generate());
1726
1727 let mut je_gen = JournalEntryGenerator::new_with_params(
1728 TransactionConfig::default(),
1729 coa,
1730 vec!["1000".to_string()],
1731 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1732 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1733 42,
1734 );
1735
1736 let mut balanced_count = 0;
1737 for _ in 0..100 {
1738 let entry = je_gen.generate();
1739
1740 let has_human_error = entry
1742 .header
1743 .header_text
1744 .as_ref()
1745 .map(|t| t.contains("[HUMAN_ERROR:"))
1746 .unwrap_or(false);
1747
1748 if !has_human_error {
1749 assert!(
1750 entry.is_balanced(),
1751 "Entry {:?} is not balanced",
1752 entry.header.document_id
1753 );
1754 balanced_count += 1;
1755 }
1756 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
1757 }
1758
1759 assert!(
1761 balanced_count >= 80,
1762 "Expected at least 80 balanced entries, got {}",
1763 balanced_count
1764 );
1765 }
1766
1767 #[test]
1768 fn test_deterministic_generation() {
1769 let mut coa_gen =
1770 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1771 let coa = Arc::new(coa_gen.generate());
1772
1773 let mut gen1 = JournalEntryGenerator::new_with_params(
1774 TransactionConfig::default(),
1775 Arc::clone(&coa),
1776 vec!["1000".to_string()],
1777 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1778 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1779 42,
1780 );
1781
1782 let mut gen2 = JournalEntryGenerator::new_with_params(
1783 TransactionConfig::default(),
1784 coa,
1785 vec!["1000".to_string()],
1786 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1787 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1788 42,
1789 );
1790
1791 for _ in 0..50 {
1792 let e1 = gen1.generate();
1793 let e2 = gen2.generate();
1794 assert_eq!(e1.header.document_id, e2.header.document_id);
1795 assert_eq!(e1.total_debit(), e2.total_debit());
1796 }
1797 }
1798
1799 #[test]
1800 fn test_templates_generate_descriptions() {
1801 let mut coa_gen =
1802 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1803 let coa = Arc::new(coa_gen.generate());
1804
1805 let template_config = TemplateConfig {
1807 names: datasynth_config::schema::NameTemplateConfig {
1808 generate_realistic_names: true,
1809 email_domain: "test.com".to_string(),
1810 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
1811 },
1812 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
1813 generate_header_text: true,
1814 generate_line_text: true,
1815 },
1816 references: datasynth_config::schema::ReferenceTemplateConfig {
1817 generate_references: true,
1818 invoice_prefix: "TEST-INV".to_string(),
1819 po_prefix: "TEST-PO".to_string(),
1820 so_prefix: "TEST-SO".to_string(),
1821 },
1822 };
1823
1824 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1825 TransactionConfig::default(),
1826 coa,
1827 vec!["1000".to_string()],
1828 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1829 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1830 42,
1831 template_config,
1832 None,
1833 )
1834 .with_persona_errors(false); for _ in 0..10 {
1837 let entry = je_gen.generate();
1838
1839 assert!(
1841 entry.header.header_text.is_some(),
1842 "Header text should be populated"
1843 );
1844
1845 assert!(
1847 entry.header.reference.is_some(),
1848 "Reference should be populated"
1849 );
1850
1851 assert!(
1853 entry.header.business_process.is_some(),
1854 "Business process should be set"
1855 );
1856
1857 for line in &entry.lines {
1859 assert!(line.line_text.is_some(), "Line text should be populated");
1860 }
1861
1862 assert!(entry.is_balanced());
1864 }
1865 }
1866
1867 #[test]
1868 fn test_user_pool_integration() {
1869 let mut coa_gen =
1870 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1871 let coa = Arc::new(coa_gen.generate());
1872
1873 let companies = vec!["1000".to_string()];
1874
1875 let mut user_gen = crate::UserGenerator::new(42);
1877 let user_pool = user_gen.generate_standard(&companies);
1878
1879 let mut je_gen = JournalEntryGenerator::new_with_full_config(
1880 TransactionConfig::default(),
1881 coa,
1882 companies,
1883 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1884 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1885 42,
1886 TemplateConfig::default(),
1887 Some(user_pool),
1888 );
1889
1890 for _ in 0..20 {
1892 let entry = je_gen.generate();
1893
1894 assert!(!entry.header.created_by.is_empty());
1897 }
1898 }
1899
1900 #[test]
1901 fn test_master_data_connection() {
1902 let mut coa_gen =
1903 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1904 let coa = Arc::new(coa_gen.generate());
1905
1906 let vendors = vec![
1908 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
1909 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
1910 ];
1911
1912 let customers = vec![
1914 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
1915 Customer::new(
1916 "C-TEST-002",
1917 "Test Customer Two",
1918 CustomerType::SmallBusiness,
1919 ),
1920 ];
1921
1922 let materials = vec![Material::new(
1924 "MAT-TEST-001",
1925 "Test Material A",
1926 MaterialType::RawMaterial,
1927 )];
1928
1929 let generator = JournalEntryGenerator::new_with_params(
1931 TransactionConfig::default(),
1932 coa,
1933 vec!["1000".to_string()],
1934 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1935 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1936 42,
1937 );
1938
1939 assert!(!generator.is_using_real_master_data());
1941
1942 let generator_with_data = generator
1944 .with_vendors(&vendors)
1945 .with_customers(&customers)
1946 .with_materials(&materials);
1947
1948 assert!(generator_with_data.is_using_real_master_data());
1950 }
1951
1952 #[test]
1953 fn test_with_master_data_convenience_method() {
1954 let mut coa_gen =
1955 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1956 let coa = Arc::new(coa_gen.generate());
1957
1958 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
1959 let customers = vec![Customer::new(
1960 "C-001",
1961 "Customer One",
1962 CustomerType::Corporate,
1963 )];
1964 let materials = vec![Material::new(
1965 "MAT-001",
1966 "Material One",
1967 MaterialType::RawMaterial,
1968 )];
1969
1970 let generator = JournalEntryGenerator::new_with_params(
1971 TransactionConfig::default(),
1972 coa,
1973 vec!["1000".to_string()],
1974 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1975 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1976 42,
1977 )
1978 .with_master_data(&vendors, &customers, &materials);
1979
1980 assert!(generator.is_using_real_master_data());
1981 }
1982
1983 #[test]
1984 fn test_stress_factors_increase_error_rate() {
1985 let mut coa_gen =
1986 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
1987 let coa = Arc::new(coa_gen.generate());
1988
1989 let generator = JournalEntryGenerator::new_with_params(
1990 TransactionConfig::default(),
1991 coa,
1992 vec!["1000".to_string()],
1993 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
1994 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
1995 42,
1996 );
1997
1998 let base_rate = 0.1;
1999
2000 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2003 assert!(
2004 (regular_rate - base_rate).abs() < 0.01,
2005 "Regular day should have minimal stress factor adjustment"
2006 );
2007
2008 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2011 assert!(
2012 month_end_rate > regular_rate,
2013 "Month end should have higher error rate than regular day"
2014 );
2015
2016 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2019 assert!(
2020 year_end_rate > month_end_rate,
2021 "Year end should have highest error rate"
2022 );
2023
2024 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2027 assert!(
2028 friday_rate > regular_rate,
2029 "Friday should have higher error rate than mid-week"
2030 );
2031
2032 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2035 assert!(
2036 monday_rate > regular_rate,
2037 "Monday should have higher error rate than mid-week"
2038 );
2039 }
2040
2041 #[test]
2042 fn test_batching_produces_similar_entries() {
2043 let mut coa_gen =
2044 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2045 let coa = Arc::new(coa_gen.generate());
2046
2047 let mut je_gen = JournalEntryGenerator::new_with_params(
2049 TransactionConfig::default(),
2050 coa,
2051 vec!["1000".to_string()],
2052 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2053 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2054 123,
2055 )
2056 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2060
2061 for entry in &entries {
2063 assert!(
2064 entry.is_balanced(),
2065 "All entries including batched should be balanced"
2066 );
2067 }
2068
2069 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2071 std::collections::HashMap::new();
2072 for entry in &entries {
2073 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2074 }
2075
2076 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2078 assert!(
2079 dates_with_multiple > 0,
2080 "With batching, should see some dates with multiple entries"
2081 );
2082 }
2083
2084 #[test]
2085 fn test_temporal_patterns_business_days() {
2086 use datasynth_config::schema::{
2087 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2088 };
2089
2090 let mut coa_gen =
2091 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2092 let coa = Arc::new(coa_gen.generate());
2093
2094 let temporal_config = TemporalPatternsConfig {
2096 enabled: true,
2097 business_days: BusinessDaySchemaConfig {
2098 enabled: true,
2099 ..Default::default()
2100 },
2101 calendars: CalendarSchemaConfig {
2102 regions: vec!["US".to_string()],
2103 custom_holidays: vec![],
2104 },
2105 ..Default::default()
2106 };
2107
2108 let mut je_gen = JournalEntryGenerator::new_with_params(
2109 TransactionConfig::default(),
2110 coa,
2111 vec!["1000".to_string()],
2112 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2113 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2115 )
2116 .with_temporal_patterns(temporal_config, 42)
2117 .with_persona_errors(false);
2118
2119 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2121
2122 for entry in &entries {
2123 let weekday = entry.header.posting_date.weekday();
2124 assert!(
2125 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2126 "Posting date {:?} should not be a weekend",
2127 entry.header.posting_date
2128 );
2129 }
2130 }
2131}