1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 sod_violation_rate: f64,
67 batch_state: Option<BatchState>,
69 drift_controller: Option<DriftController>,
71 business_day_calculator: Option<BusinessDayCalculator>,
73 processing_lag_calculator: Option<ProcessingLagCalculator>,
74 temporal_patterns_config: Option<TemporalPatternsConfig>,
75}
76
77#[derive(Clone)]
82struct BatchState {
83 base_account_number: String,
85 base_amount: rust_decimal::Decimal,
86 base_business_process: Option<BusinessProcess>,
87 base_posting_date: NaiveDate,
88 remaining: u8,
90}
91
92impl JournalEntryGenerator {
93 pub fn new_with_params(
95 config: TransactionConfig,
96 coa: Arc<ChartOfAccounts>,
97 companies: Vec<String>,
98 start_date: NaiveDate,
99 end_date: NaiveDate,
100 seed: u64,
101 ) -> Self {
102 Self::new_with_full_config(
103 config,
104 coa,
105 companies,
106 start_date,
107 end_date,
108 seed,
109 TemplateConfig::default(),
110 None,
111 )
112 }
113
114 #[allow(clippy::too_many_arguments)]
116 pub fn new_with_full_config(
117 config: TransactionConfig,
118 coa: Arc<ChartOfAccounts>,
119 companies: Vec<String>,
120 start_date: NaiveDate,
121 end_date: NaiveDate,
122 seed: u64,
123 template_config: TemplateConfig,
124 user_pool: Option<UserPool>,
125 ) -> Self {
126 let user_pool = user_pool.or_else(|| {
128 if template_config.names.generate_realistic_names {
129 let user_gen_config = UserGeneratorConfig {
130 culture_distribution: vec![
131 (
132 datasynth_core::templates::NameCulture::WesternUs,
133 template_config.names.culture_distribution.western_us,
134 ),
135 (
136 datasynth_core::templates::NameCulture::Hispanic,
137 template_config.names.culture_distribution.hispanic,
138 ),
139 (
140 datasynth_core::templates::NameCulture::German,
141 template_config.names.culture_distribution.german,
142 ),
143 (
144 datasynth_core::templates::NameCulture::French,
145 template_config.names.culture_distribution.french,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Chinese,
149 template_config.names.culture_distribution.chinese,
150 ),
151 (
152 datasynth_core::templates::NameCulture::Japanese,
153 template_config.names.culture_distribution.japanese,
154 ),
155 (
156 datasynth_core::templates::NameCulture::Indian,
157 template_config.names.culture_distribution.indian,
158 ),
159 ],
160 email_domain: template_config.names.email_domain.clone(),
161 generate_realistic_names: true,
162 };
163 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
164 Some(user_gen.generate_standard(&companies))
165 } else {
166 None
167 }
168 });
169
170 let mut ref_gen = ReferenceGenerator::new(
172 start_date.year(),
173 companies
174 .first()
175 .map(std::string::String::as_str)
176 .unwrap_or("1000"),
177 );
178 ref_gen.set_prefix(
179 ReferenceType::Invoice,
180 &template_config.references.invoice_prefix,
181 );
182 ref_gen.set_prefix(
183 ReferenceType::PurchaseOrder,
184 &template_config.references.po_prefix,
185 );
186 ref_gen.set_prefix(
187 ReferenceType::SalesOrder,
188 &template_config.references.so_prefix,
189 );
190
191 let company_selector = WeightedCompanySelector::uniform(companies.clone());
193
194 Self {
195 rng: seeded_rng(seed, 0),
196 seed,
197 config: config.clone(),
198 coa,
199 companies,
200 company_selector,
201 line_sampler: LineItemSampler::with_config(
202 seed + 1,
203 config.line_item_distribution.clone(),
204 config.even_odd_distribution.clone(),
205 config.debit_credit_distribution.clone(),
206 ),
207 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
208 temporal_sampler: TemporalSampler::with_config(
209 seed + 3,
210 config.seasonality.clone(),
211 WorkingHoursConfig::default(),
212 Vec::new(),
213 ),
214 start_date,
215 end_date,
216 count: 0,
217 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
218 user_pool,
219 description_generator: DescriptionGenerator::new(),
220 reference_generator: ref_gen,
221 template_config,
222 vendor_pool: VendorPool::standard(),
223 customer_pool: CustomerPool::standard(),
224 material_pool: None,
225 using_real_master_data: false,
226 fraud_config: FraudConfig::default(),
227 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
232 drift_controller: None,
233 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
236 Region::US,
237 start_date.year(),
238 ))),
239 processing_lag_calculator: None,
240 temporal_patterns_config: None,
241 }
242 }
243
244 pub fn from_generator_config(
249 full_config: &GeneratorConfig,
250 coa: Arc<ChartOfAccounts>,
251 start_date: NaiveDate,
252 end_date: NaiveDate,
253 seed: u64,
254 ) -> Self {
255 let companies: Vec<String> = full_config
256 .companies
257 .iter()
258 .map(|c| c.code.clone())
259 .collect();
260
261 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
263
264 let mut generator = Self::new_with_full_config(
265 full_config.transactions.clone(),
266 coa,
267 companies,
268 start_date,
269 end_date,
270 seed,
271 full_config.templates.clone(),
272 None,
273 );
274
275 generator.company_selector = company_selector;
277
278 generator.fraud_config = full_config.fraud.clone();
280
281 let temporal_config = &full_config.temporal_patterns;
283 if temporal_config.enabled {
284 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
285 }
286
287 generator
288 }
289
290 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
297 if config.business_days.enabled {
299 let region = config
300 .calendars
301 .regions
302 .first()
303 .map(|r| Self::parse_region(r))
304 .unwrap_or(Region::US);
305
306 let calendar = HolidayCalendar::new(region, self.start_date.year());
307 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
308 }
309
310 if config.processing_lags.enabled {
312 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
313 self.processing_lag_calculator =
314 Some(ProcessingLagCalculator::with_config(seed, lag_config));
315 }
316
317 let model = config.period_end.model.as_deref().unwrap_or("flat");
319 if model != "flat"
320 || config
321 .period_end
322 .month_end
323 .as_ref()
324 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
325 {
326 let dynamics = Self::convert_period_end_config(&config.period_end);
327 self.temporal_sampler.set_period_end_dynamics(dynamics);
328 }
329
330 self.temporal_patterns_config = Some(config);
331 self
332 }
333
334 pub fn with_country_pack_temporal(
342 mut self,
343 config: TemporalPatternsConfig,
344 seed: u64,
345 pack: &CountryPack,
346 ) -> Self {
347 if config.business_days.enabled {
349 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
350 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
351 }
352
353 if config.processing_lags.enabled {
355 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
356 self.processing_lag_calculator =
357 Some(ProcessingLagCalculator::with_config(seed, lag_config));
358 }
359
360 let model = config.period_end.model.as_deref().unwrap_or("flat");
362 if model != "flat"
363 || config
364 .period_end
365 .month_end
366 .as_ref()
367 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
368 {
369 let dynamics = Self::convert_period_end_config(&config.period_end);
370 self.temporal_sampler.set_period_end_dynamics(dynamics);
371 }
372
373 self.temporal_patterns_config = Some(config);
374 self
375 }
376
377 fn convert_processing_lag_config(
379 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
380 ) -> ProcessingLagConfig {
381 let mut config = ProcessingLagConfig {
382 enabled: schema.enabled,
383 ..Default::default()
384 };
385
386 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
388 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
389 if let Some(min) = lag.min_hours {
390 dist.min_lag_hours = min;
391 }
392 if let Some(max) = lag.max_hours {
393 dist.max_lag_hours = max;
394 }
395 dist
396 };
397
398 if let Some(ref lag) = schema.sales_order_lag {
400 config
401 .event_lags
402 .insert(EventType::SalesOrder, convert_lag(lag));
403 }
404 if let Some(ref lag) = schema.purchase_order_lag {
405 config
406 .event_lags
407 .insert(EventType::PurchaseOrder, convert_lag(lag));
408 }
409 if let Some(ref lag) = schema.goods_receipt_lag {
410 config
411 .event_lags
412 .insert(EventType::GoodsReceipt, convert_lag(lag));
413 }
414 if let Some(ref lag) = schema.invoice_receipt_lag {
415 config
416 .event_lags
417 .insert(EventType::InvoiceReceipt, convert_lag(lag));
418 }
419 if let Some(ref lag) = schema.invoice_issue_lag {
420 config
421 .event_lags
422 .insert(EventType::InvoiceIssue, convert_lag(lag));
423 }
424 if let Some(ref lag) = schema.payment_lag {
425 config
426 .event_lags
427 .insert(EventType::Payment, convert_lag(lag));
428 }
429 if let Some(ref lag) = schema.journal_entry_lag {
430 config
431 .event_lags
432 .insert(EventType::JournalEntry, convert_lag(lag));
433 }
434
435 if let Some(ref cross_day) = schema.cross_day_posting {
437 config.cross_day = CrossDayConfig {
438 enabled: cross_day.enabled,
439 probability_by_hour: cross_day.probability_by_hour.clone(),
440 ..Default::default()
441 };
442 }
443
444 config
445 }
446
447 fn convert_period_end_config(
449 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
450 ) -> PeriodEndDynamics {
451 let model_type = schema.model.as_deref().unwrap_or("exponential");
452
453 let convert_period =
455 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
456 default_peak: f64|
457 -> PeriodEndConfig {
458 if let Some(p) = period {
459 let model = match model_type {
460 "flat" => PeriodEndModel::FlatMultiplier {
461 multiplier: p.peak_multiplier.unwrap_or(default_peak),
462 },
463 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
464 start_day: p.start_day.unwrap_or(-10),
465 sustained_high_days: p.sustained_high_days.unwrap_or(3),
466 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467 ramp_up_days: 3, },
469 _ => PeriodEndModel::ExponentialAcceleration {
470 start_day: p.start_day.unwrap_or(-10),
471 base_multiplier: p.base_multiplier.unwrap_or(1.0),
472 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
473 decay_rate: p.decay_rate.unwrap_or(0.3),
474 },
475 };
476 PeriodEndConfig {
477 enabled: true,
478 model,
479 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
480 }
481 } else {
482 PeriodEndConfig {
483 enabled: true,
484 model: PeriodEndModel::ExponentialAcceleration {
485 start_day: -10,
486 base_multiplier: 1.0,
487 peak_multiplier: default_peak,
488 decay_rate: 0.3,
489 },
490 additional_multiplier: 1.0,
491 }
492 }
493 };
494
495 PeriodEndDynamics::new(
496 convert_period(schema.month_end.as_ref(), 2.0),
497 convert_period(schema.quarter_end.as_ref(), 3.5),
498 convert_period(schema.year_end.as_ref(), 5.0),
499 )
500 }
501
502 fn parse_region(region_str: &str) -> Region {
504 match region_str.to_uppercase().as_str() {
505 "US" => Region::US,
506 "DE" => Region::DE,
507 "GB" => Region::GB,
508 "CN" => Region::CN,
509 "JP" => Region::JP,
510 "IN" => Region::IN,
511 "BR" => Region::BR,
512 "MX" => Region::MX,
513 "AU" => Region::AU,
514 "SG" => Region::SG,
515 "KR" => Region::KR,
516 "FR" => Region::FR,
517 "IT" => Region::IT,
518 "ES" => Region::ES,
519 "CA" => Region::CA,
520 _ => Region::US,
521 }
522 }
523
524 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
526 self.company_selector = selector;
527 }
528
529 pub fn company_selector(&self) -> &WeightedCompanySelector {
531 &self.company_selector
532 }
533
534 pub fn set_fraud_config(&mut self, config: FraudConfig) {
536 self.fraud_config = config;
537 }
538
539 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
544 if !vendors.is_empty() {
545 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
546 self.using_real_master_data = true;
547 }
548 self
549 }
550
551 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
556 if !customers.is_empty() {
557 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
558 self.using_real_master_data = true;
559 }
560 self
561 }
562
563 pub fn with_materials(mut self, materials: &[Material]) -> Self {
567 if !materials.is_empty() {
568 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
569 self.using_real_master_data = true;
570 }
571 self
572 }
573
574 pub fn with_master_data(
579 self,
580 vendors: &[Vendor],
581 customers: &[Customer],
582 materials: &[Material],
583 ) -> Self {
584 self.with_vendors(vendors)
585 .with_customers(customers)
586 .with_materials(materials)
587 }
588
589 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
596 let name_gen =
597 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
598 let config = UserGeneratorConfig {
599 culture_distribution: Vec::new(),
602 email_domain: name_gen.email_domain().to_string(),
603 generate_realistic_names: true,
604 };
605 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
606 self.user_pool = Some(user_gen.generate_standard(&self.companies));
607 self
608 }
609
610 pub fn is_using_real_master_data(&self) -> bool {
612 self.using_real_master_data
613 }
614
615 fn determine_fraud(&mut self) -> Option<FraudType> {
617 if !self.fraud_config.enabled {
618 return None;
619 }
620
621 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
623 return None;
624 }
625
626 Some(self.select_fraud_type())
628 }
629
630 fn select_fraud_type(&mut self) -> FraudType {
632 let dist = &self.fraud_config.fraud_type_distribution;
633 let roll: f64 = self.rng.random();
634
635 let mut cumulative = 0.0;
636
637 cumulative += dist.suspense_account_abuse;
638 if roll < cumulative {
639 return FraudType::SuspenseAccountAbuse;
640 }
641
642 cumulative += dist.fictitious_transaction;
643 if roll < cumulative {
644 return FraudType::FictitiousTransaction;
645 }
646
647 cumulative += dist.revenue_manipulation;
648 if roll < cumulative {
649 return FraudType::RevenueManipulation;
650 }
651
652 cumulative += dist.expense_capitalization;
653 if roll < cumulative {
654 return FraudType::ExpenseCapitalization;
655 }
656
657 cumulative += dist.split_transaction;
658 if roll < cumulative {
659 return FraudType::SplitTransaction;
660 }
661
662 cumulative += dist.timing_anomaly;
663 if roll < cumulative {
664 return FraudType::TimingAnomaly;
665 }
666
667 cumulative += dist.unauthorized_access;
668 if roll < cumulative {
669 return FraudType::UnauthorizedAccess;
670 }
671
672 FraudType::DuplicatePayment
674 }
675
676 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
678 match fraud_type {
679 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
680 FraudAmountPattern::ThresholdAdjacent
681 }
682 FraudType::FictitiousTransaction
683 | FraudType::FictitiousEntry
684 | FraudType::SuspenseAccountAbuse
685 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
686 FraudType::RevenueManipulation
687 | FraudType::ExpenseCapitalization
688 | FraudType::ImproperCapitalization
689 | FraudType::ReserveManipulation
690 | FraudType::UnauthorizedAccess
691 | FraudType::PrematureRevenue
692 | FraudType::UnderstatedLiabilities
693 | FraudType::OverstatedAssets
694 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
695 FraudType::DuplicatePayment
696 | FraudType::TimingAnomaly
697 | FraudType::SelfApproval
698 | FraudType::ExceededApprovalLimit
699 | FraudType::SegregationOfDutiesViolation
700 | FraudType::UnauthorizedApproval
701 | FraudType::CollusiveApproval
702 | FraudType::FictitiousVendor
703 | FraudType::ShellCompanyPayment
704 | FraudType::Kickback
705 | FraudType::KickbackScheme
706 | FraudType::InvoiceManipulation
707 | FraudType::AssetMisappropriation
708 | FraudType::InventoryTheft
709 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
710 FraudType::ImproperRevenueRecognition
712 | FraudType::ImproperPoAllocation
713 | FraudType::VariableConsiderationManipulation
714 | FraudType::ContractModificationMisstatement => {
715 FraudAmountPattern::StatisticallyImprobable
716 }
717 FraudType::LeaseClassificationManipulation
719 | FraudType::OffBalanceSheetLease
720 | FraudType::LeaseLiabilityUnderstatement
721 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
722 FraudType::FairValueHierarchyManipulation
724 | FraudType::Level3InputManipulation
725 | FraudType::ValuationTechniqueManipulation => {
726 FraudAmountPattern::StatisticallyImprobable
727 }
728 FraudType::DelayedImpairment
730 | FraudType::ImpairmentTestAvoidance
731 | FraudType::CashFlowProjectionManipulation
732 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
733 FraudType::BidRigging
735 | FraudType::PhantomVendorContract
736 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
737 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
738 FraudType::GhostEmployeePayroll
740 | FraudType::PayrollInflation
741 | FraudType::DuplicateExpenseReport
742 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
743 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
744 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
746 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
747 }
748 }
749
750 #[inline]
752 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
753 self.uuid_factory.next()
754 }
755
756 const COST_CENTER_POOL: &'static [&'static str] =
758 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
759
760 fn enrich_line_items(&self, entry: &mut JournalEntry) {
766 let posting_date = entry.header.posting_date;
767 let company_code = &entry.header.company_code;
768 let header_text = entry.header.header_text.clone();
769 let business_process = entry.header.business_process;
770
771 let doc_id_bytes = entry.header.document_id.as_bytes();
773 let mut cc_seed: usize = 0;
774 for &b in doc_id_bytes {
775 cc_seed = cc_seed.wrapping_add(b as usize);
776 }
777
778 for (i, line) in entry.lines.iter_mut().enumerate() {
779 if line.account_description.is_none() {
781 line.account_description = self
782 .coa
783 .get_account(&line.gl_account)
784 .map(|a| a.short_description.clone());
785 }
786
787 if line.cost_center.is_none() {
789 let first_char = line.gl_account.chars().next().unwrap_or('0');
790 if first_char == '5' || first_char == '6' {
791 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
792 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
793 }
794 }
795
796 if line.profit_center.is_none() {
798 let suffix = match business_process {
799 Some(BusinessProcess::P2P) => "-P2P",
800 Some(BusinessProcess::O2C) => "-O2C",
801 Some(BusinessProcess::R2R) => "-R2R",
802 Some(BusinessProcess::H2R) => "-H2R",
803 _ => "",
804 };
805 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
806 }
807
808 if line.line_text.is_none() {
810 line.line_text = header_text.clone();
811 }
812
813 if line.value_date.is_none()
815 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
816 {
817 line.value_date = Some(posting_date);
818 }
819
820 if line.assignment.is_none() {
822 if line.gl_account.starts_with("2000") {
823 if let Some(ref ht) = header_text {
825 if let Some(vendor_part) = ht.rsplit(" - ").next() {
827 if vendor_part.starts_with("V-")
828 || vendor_part.starts_with("VENDOR")
829 || vendor_part.starts_with("Vendor")
830 {
831 line.assignment = Some(vendor_part.to_string());
832 }
833 }
834 }
835 } else if line.gl_account.starts_with("1100") {
836 if let Some(ref ht) = header_text {
838 if let Some(customer_part) = ht.rsplit(" - ").next() {
839 if customer_part.starts_with("C-")
840 || customer_part.starts_with("CUST")
841 || customer_part.starts_with("Customer")
842 {
843 line.assignment = Some(customer_part.to_string());
844 }
845 }
846 }
847 }
848 }
849 }
850 }
851
852 pub fn generate(&mut self) -> JournalEntry {
854 debug!(
855 count = self.count,
856 companies = self.companies.len(),
857 start_date = %self.start_date,
858 end_date = %self.end_date,
859 "Generating journal entry"
860 );
861
862 if let Some(ref state) = self.batch_state {
864 if state.remaining > 0 {
865 return self.generate_batched_entry();
866 }
867 }
868
869 self.count += 1;
870
871 let document_id = self.generate_deterministic_uuid();
873
874 let mut posting_date = self
876 .temporal_sampler
877 .sample_date(self.start_date, self.end_date);
878
879 if let Some(ref calc) = self.business_day_calculator {
881 if !calc.is_business_day(posting_date) {
882 posting_date = calc.next_business_day(posting_date, false);
884 if posting_date > self.end_date {
886 posting_date = calc.prev_business_day(self.end_date, true);
887 }
888 }
889 }
890
891 let company_code = self.company_selector.select(&mut self.rng).to_string();
893
894 let line_spec = self.line_sampler.sample();
896
897 let source = self.select_source();
899 let is_automated = matches!(
900 source,
901 TransactionSource::Automated | TransactionSource::Recurring
902 );
903
904 let business_process = self.select_business_process();
906
907 let fraud_type = self.determine_fraud();
909 let is_fraud = fraud_type.is_some();
910
911 let time = self.temporal_sampler.sample_time(!is_automated);
913 let created_at = posting_date.and_time(time).and_utc();
914
915 let (created_by, user_persona) = self.select_user(is_automated);
917
918 let mut header =
920 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
921 header.created_at = created_at;
922 header.source = source;
923 header.created_by = created_by;
924 header.user_persona = user_persona;
925 header.business_process = Some(business_process);
926 header.document_type = Self::document_type_for_process(business_process).to_string();
927 header.is_fraud = is_fraud;
928 header.fraud_type = fraud_type;
929
930 let is_manual = matches!(source, TransactionSource::Manual);
932 header.is_manual = is_manual;
933
934 header.source_system = if is_manual {
936 if self.rng.random::<f64>() < 0.70 {
937 "manual".to_string()
938 } else {
939 "spreadsheet".to_string()
940 }
941 } else {
942 let roll: f64 = self.rng.random();
943 if roll < 0.40 {
944 "SAP-FI".to_string()
945 } else if roll < 0.60 {
946 "SAP-MM".to_string()
947 } else if roll < 0.80 {
948 "SAP-SD".to_string()
949 } else if roll < 0.95 {
950 "interface".to_string()
951 } else {
952 "SAP-HR".to_string()
953 }
954 };
955
956 let is_post_close = posting_date.month() == self.end_date.month()
959 && posting_date.year() == self.end_date.year()
960 && posting_date.day() > 25;
961 header.is_post_close = is_post_close;
962
963 let created_date = if is_manual {
966 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
967 } else {
968 let lag_days = self.rng.random_range(0i64..=3);
969 let created_naive_date = posting_date
970 .checked_sub_signed(chrono::Duration::days(lag_days))
971 .unwrap_or(posting_date);
972 created_naive_date.and_hms_opt(
973 self.rng.random_range(8u32..=17),
974 self.rng.random_range(0u32..=59),
975 self.rng.random_range(0u32..=59),
976 )
977 };
978 header.created_date = created_date;
979
980 let mut context =
982 DescriptionContext::with_period(posting_date.month(), posting_date.year());
983
984 match business_process {
986 BusinessProcess::P2P => {
987 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
988 context.vendor_name = Some(vendor.name.clone());
989 }
990 }
991 BusinessProcess::O2C => {
992 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
993 context.customer_name = Some(customer.name.clone());
994 }
995 }
996 _ => {}
997 }
998
999 if self.template_config.descriptions.generate_header_text {
1001 header.header_text = Some(self.description_generator.generate_header_text(
1002 business_process,
1003 &context,
1004 &mut self.rng,
1005 ));
1006 }
1007
1008 if self.template_config.references.generate_references {
1010 header.reference = Some(
1011 self.reference_generator
1012 .generate_for_process_year(business_process, posting_date.year()),
1013 );
1014 }
1015
1016 header.source_document = header
1018 .reference
1019 .as_deref()
1020 .and_then(DocumentRef::parse)
1021 .or_else(|| {
1022 if header.source == TransactionSource::Manual {
1023 Some(DocumentRef::Manual)
1024 } else {
1025 None
1026 }
1027 });
1028
1029 let mut entry = JournalEntry::new(header);
1031
1032 let base_amount = if let Some(ft) = fraud_type {
1034 let pattern = self.fraud_type_to_amount_pattern(ft);
1035 self.amount_sampler.sample_fraud(pattern)
1036 } else {
1037 self.amount_sampler.sample()
1038 };
1039
1040 let drift_adjusted_amount = {
1042 let drift = self.get_drift_adjustments(posting_date);
1043 if drift.amount_mean_multiplier != 1.0 {
1044 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1046 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1047 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1048 } else {
1049 base_amount
1050 }
1051 };
1052
1053 let total_amount = if is_automated {
1055 drift_adjusted_amount } else {
1057 self.apply_human_variation(drift_adjusted_amount)
1058 };
1059
1060 let debit_amounts = self
1062 .amount_sampler
1063 .sample_summing_to(line_spec.debit_count, total_amount);
1064 for (i, amount) in debit_amounts.into_iter().enumerate() {
1065 let account_number = self.select_debit_account().account_number.clone();
1066 let mut line = JournalEntryLine::debit(
1067 entry.header.document_id,
1068 (i + 1) as u32,
1069 account_number.clone(),
1070 amount,
1071 );
1072
1073 if self.template_config.descriptions.generate_line_text {
1075 line.line_text = Some(self.description_generator.generate_line_text(
1076 &account_number,
1077 &context,
1078 &mut self.rng,
1079 ));
1080 }
1081
1082 entry.add_line(line);
1083 }
1084
1085 let credit_amounts = self
1087 .amount_sampler
1088 .sample_summing_to(line_spec.credit_count, total_amount);
1089 for (i, amount) in credit_amounts.into_iter().enumerate() {
1090 let account_number = self.select_credit_account().account_number.clone();
1091 let mut line = JournalEntryLine::credit(
1092 entry.header.document_id,
1093 (line_spec.debit_count + i + 1) as u32,
1094 account_number.clone(),
1095 amount,
1096 );
1097
1098 if self.template_config.descriptions.generate_line_text {
1100 line.line_text = Some(self.description_generator.generate_line_text(
1101 &account_number,
1102 &context,
1103 &mut self.rng,
1104 ));
1105 }
1106
1107 entry.add_line(line);
1108 }
1109
1110 self.enrich_line_items(&mut entry);
1112
1113 if self.persona_errors_enabled && !is_automated {
1115 self.maybe_inject_persona_error(&mut entry);
1116 }
1117
1118 if self.approval_enabled {
1120 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1121 }
1122
1123 self.populate_approval_fields(&mut entry, posting_date);
1125
1126 self.maybe_start_batch(&entry);
1128
1129 entry
1130 }
1131
1132 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1137 self.persona_errors_enabled = enabled;
1138 self
1139 }
1140
1141 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1146 self.fraud_config = config;
1147 self
1148 }
1149
1150 pub fn persona_errors_enabled(&self) -> bool {
1152 self.persona_errors_enabled
1153 }
1154
1155 pub fn with_batching(mut self, enabled: bool) -> Self {
1160 if !enabled {
1161 self.batch_state = None;
1162 }
1163 self
1164 }
1165
1166 pub fn batching_enabled(&self) -> bool {
1168 true
1170 }
1171
1172 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1177 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1179 return;
1180 }
1181
1182 if self.rng.random::<f64>() > 0.15 {
1184 return;
1185 }
1186
1187 let base_account = entry
1189 .lines
1190 .first()
1191 .map(|l| l.gl_account.clone())
1192 .unwrap_or_default();
1193
1194 let base_amount = entry.total_debit();
1195
1196 self.batch_state = Some(BatchState {
1197 base_account_number: base_account,
1198 base_amount,
1199 base_business_process: entry.header.business_process,
1200 base_posting_date: entry.header.posting_date,
1201 remaining: self.rng.random_range(2..7), });
1203 }
1204
1205 fn generate_batched_entry(&mut self) -> JournalEntry {
1213 use rust_decimal::Decimal;
1214
1215 if let Some(ref mut state) = self.batch_state {
1217 state.remaining = state.remaining.saturating_sub(1);
1218 }
1219
1220 let Some(batch) = self.batch_state.clone() else {
1221 tracing::warn!(
1224 "generate_batched_entry called without batch_state; generating standard entry"
1225 );
1226 self.batch_state = None;
1227 return self.generate();
1228 };
1229
1230 let posting_date = batch.base_posting_date;
1232
1233 self.count += 1;
1234 let document_id = self.generate_deterministic_uuid();
1235
1236 let company_code = self.company_selector.select(&mut self.rng).to_string();
1238
1239 let _line_spec = LineItemSpec {
1241 total_count: 2,
1242 debit_count: 1,
1243 credit_count: 1,
1244 split_type: DebitCreditSplit::Equal,
1245 };
1246
1247 let source = TransactionSource::Manual;
1249
1250 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1252
1253 let time = self.temporal_sampler.sample_time(true);
1255 let created_at = posting_date.and_time(time).and_utc();
1256
1257 let (created_by, user_persona) = self.select_user(false);
1259
1260 let mut header =
1262 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1263 header.created_at = created_at;
1264 header.source = source;
1265 header.created_by = created_by;
1266 header.user_persona = user_persona;
1267 header.business_process = Some(business_process);
1268 header.document_type = Self::document_type_for_process(business_process).to_string();
1269
1270 header.source_document = Some(DocumentRef::Manual);
1272
1273 header.is_manual = true;
1275 header.source_system = if self.rng.random::<f64>() < 0.70 {
1276 "manual".to_string()
1277 } else {
1278 "spreadsheet".to_string()
1279 };
1280 header.is_post_close = posting_date.month() == self.end_date.month()
1281 && posting_date.year() == self.end_date.year()
1282 && posting_date.day() > 25;
1283 header.created_date =
1284 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1285
1286 let variation = self.rng.random_range(-0.15..0.15);
1288 let varied_amount =
1289 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1290 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1291
1292 let mut entry = JournalEntry::new(header);
1294
1295 let debit_line = JournalEntryLine::debit(
1297 entry.header.document_id,
1298 1,
1299 batch.base_account_number.clone(),
1300 total_amount,
1301 );
1302 entry.add_line(debit_line);
1303
1304 let credit_account = self.select_credit_account().account_number.clone();
1306 let credit_line =
1307 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1308 entry.add_line(credit_line);
1309
1310 self.enrich_line_items(&mut entry);
1312
1313 if self.persona_errors_enabled {
1315 self.maybe_inject_persona_error(&mut entry);
1316 }
1317
1318 if self.approval_enabled {
1320 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1321 }
1322
1323 self.populate_approval_fields(&mut entry, posting_date);
1325
1326 if batch.remaining <= 1 {
1328 self.batch_state = None;
1329 }
1330
1331 entry
1332 }
1333
1334 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1336 let persona_str = &entry.header.user_persona;
1338 let persona = match persona_str.to_lowercase().as_str() {
1339 s if s.contains("junior") => UserPersona::JuniorAccountant,
1340 s if s.contains("senior") => UserPersona::SeniorAccountant,
1341 s if s.contains("controller") => UserPersona::Controller,
1342 s if s.contains("manager") => UserPersona::Manager,
1343 s if s.contains("executive") => UserPersona::Executive,
1344 _ => return, };
1346
1347 let base_error_rate = persona.error_rate();
1349
1350 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1352
1353 if self.rng.random::<f64>() >= adjusted_rate {
1355 return; }
1357
1358 self.inject_human_error(entry, persona);
1360 }
1361
1362 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1371 use chrono::Datelike;
1372
1373 let mut rate = base_rate;
1374 let day = posting_date.day();
1375 let month = posting_date.month();
1376
1377 if month == 12 && day >= 28 {
1379 rate *= 2.0;
1380 return rate.min(0.5); }
1382
1383 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1385 rate *= 1.75; return rate.min(0.4);
1387 }
1388
1389 if day >= 28 {
1391 rate *= 1.5; }
1393
1394 let weekday = posting_date.weekday();
1396 match weekday {
1397 chrono::Weekday::Mon => {
1398 rate *= 1.2;
1400 }
1401 chrono::Weekday::Fri => {
1402 rate *= 1.3;
1404 }
1405 _ => {}
1406 }
1407
1408 rate.min(0.4)
1410 }
1411
1412 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1421 use rust_decimal::Decimal;
1422
1423 if amount < Decimal::from(10) {
1425 return amount;
1426 }
1427
1428 if self.rng.random::<f64>() > 0.70 {
1430 return amount;
1431 }
1432
1433 let variation_type: u8 = self.rng.random_range(0..4);
1435
1436 match variation_type {
1437 0 => {
1438 let variation_pct = self.rng.random_range(-0.02..0.02);
1440 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1441 (amount + variation).round_dp(2)
1442 }
1443 1 => {
1444 let ten = Decimal::from(10);
1446 (amount / ten).round() * ten
1447 }
1448 2 => {
1449 if amount >= Decimal::from(500) {
1451 let hundred = Decimal::from(100);
1452 (amount / hundred).round() * hundred
1453 } else {
1454 amount
1455 }
1456 }
1457 3 => {
1458 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1460 (amount + cents).max(Decimal::ZERO).round_dp(2)
1461 }
1462 _ => amount,
1463 }
1464 }
1465
1466 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1472 let balancing_idx = entry.lines.iter().position(|l| {
1474 if modified_was_debit {
1475 l.credit_amount > Decimal::ZERO
1476 } else {
1477 l.debit_amount > Decimal::ZERO
1478 }
1479 });
1480
1481 if let Some(idx) = balancing_idx {
1482 if modified_was_debit {
1483 entry.lines[idx].credit_amount += impact;
1484 } else {
1485 entry.lines[idx].debit_amount += impact;
1486 }
1487 }
1488 }
1489
1490 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1495 use rust_decimal::Decimal;
1496
1497 let error_type: u8 = match persona {
1499 UserPersona::JuniorAccountant => {
1500 self.rng.random_range(0..5)
1502 }
1503 UserPersona::SeniorAccountant => {
1504 self.rng.random_range(0..3)
1506 }
1507 UserPersona::Controller | UserPersona::Manager => {
1508 self.rng.random_range(3..5)
1510 }
1511 _ => return,
1512 };
1513
1514 match error_type {
1515 0 => {
1516 if let Some(line) = entry.lines.get_mut(0) {
1518 let is_debit = line.debit_amount > Decimal::ZERO;
1519 let original_amount = if is_debit {
1520 line.debit_amount
1521 } else {
1522 line.credit_amount
1523 };
1524
1525 let s = original_amount.to_string();
1527 if s.len() >= 2 {
1528 let chars: Vec<char> = s.chars().collect();
1529 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1530 if chars[pos].is_ascii_digit()
1531 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1532 {
1533 let mut new_chars = chars;
1534 new_chars.swap(pos, pos + 1);
1535 if let Ok(new_amount) =
1536 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1537 {
1538 let impact = new_amount - original_amount;
1539
1540 if is_debit {
1542 entry.lines[0].debit_amount = new_amount;
1543 } else {
1544 entry.lines[0].credit_amount = new_amount;
1545 }
1546
1547 Self::rebalance_entry(entry, is_debit, impact);
1549
1550 entry.header.header_text = Some(
1551 entry.header.header_text.clone().unwrap_or_default()
1552 + " [HUMAN_ERROR:TRANSPOSITION]",
1553 );
1554 }
1555 }
1556 }
1557 }
1558 }
1559 1 => {
1560 if let Some(line) = entry.lines.get_mut(0) {
1562 let is_debit = line.debit_amount > Decimal::ZERO;
1563 let original_amount = if is_debit {
1564 line.debit_amount
1565 } else {
1566 line.credit_amount
1567 };
1568
1569 let new_amount = original_amount * Decimal::new(10, 0);
1570 let impact = new_amount - original_amount;
1571
1572 if is_debit {
1574 entry.lines[0].debit_amount = new_amount;
1575 } else {
1576 entry.lines[0].credit_amount = new_amount;
1577 }
1578
1579 Self::rebalance_entry(entry, is_debit, impact);
1581
1582 entry.header.header_text = Some(
1583 entry.header.header_text.clone().unwrap_or_default()
1584 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1585 );
1586 }
1587 }
1588 2 => {
1589 if let Some(ref mut text) = entry.header.header_text {
1591 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1592 let correct = ["the", "and", "with", "that", "receive"];
1593 let idx = self.rng.random_range(0..typos.len());
1594 if text.to_lowercase().contains(correct[idx]) {
1595 *text = text.replace(correct[idx], typos[idx]);
1596 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1597 }
1598 }
1599 }
1600 3 => {
1601 if let Some(line) = entry.lines.get_mut(0) {
1603 let is_debit = line.debit_amount > Decimal::ZERO;
1604 let original_amount = if is_debit {
1605 line.debit_amount
1606 } else {
1607 line.credit_amount
1608 };
1609
1610 let new_amount =
1611 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1612 let impact = new_amount - original_amount;
1613
1614 if is_debit {
1616 entry.lines[0].debit_amount = new_amount;
1617 } else {
1618 entry.lines[0].credit_amount = new_amount;
1619 }
1620
1621 Self::rebalance_entry(entry, is_debit, impact);
1623
1624 entry.header.header_text = Some(
1625 entry.header.header_text.clone().unwrap_or_default()
1626 + " [HUMAN_ERROR:ROUNDED]",
1627 );
1628 }
1629 }
1630 4 => {
1631 if entry.header.document_date == entry.header.posting_date {
1634 let days_late = self.rng.random_range(5..15);
1635 entry.header.document_date =
1636 entry.header.posting_date - chrono::Duration::days(days_late);
1637 entry.header.header_text = Some(
1638 entry.header.header_text.clone().unwrap_or_default()
1639 + " [HUMAN_ERROR:LATE_POSTING]",
1640 );
1641 }
1642 }
1643 _ => {}
1644 }
1645 }
1646
1647 fn maybe_apply_approval_workflow(
1652 &mut self,
1653 entry: &mut JournalEntry,
1654 _posting_date: NaiveDate,
1655 ) {
1656 use rust_decimal::Decimal;
1657
1658 let amount = entry.total_debit();
1659
1660 if amount <= self.approval_threshold {
1662 let workflow = ApprovalWorkflow::auto_approved(
1664 entry.header.created_by.clone(),
1665 entry.header.user_persona.clone(),
1666 amount,
1667 entry.header.created_at,
1668 );
1669 entry.header.approval_workflow = Some(workflow);
1670 return;
1671 }
1672
1673 entry.header.sox_relevant = true;
1675
1676 let required_levels = if amount > Decimal::new(100000, 0) {
1678 3 } else if amount > Decimal::new(50000, 0) {
1680 2 } else {
1682 1 };
1684
1685 let mut workflow = ApprovalWorkflow::new(
1687 entry.header.created_by.clone(),
1688 entry.header.user_persona.clone(),
1689 amount,
1690 );
1691 workflow.required_levels = required_levels;
1692
1693 let submit_time = entry.header.created_at;
1695 let submit_action = ApprovalAction::new(
1696 entry.header.created_by.clone(),
1697 entry.header.user_persona.clone(),
1698 self.parse_persona(&entry.header.user_persona),
1699 ApprovalActionType::Submit,
1700 0,
1701 )
1702 .with_timestamp(submit_time);
1703
1704 workflow.actions.push(submit_action);
1705 workflow.status = ApprovalStatus::Pending;
1706 workflow.submitted_at = Some(submit_time);
1707
1708 let mut current_time = submit_time;
1710 for level in 1..=required_levels {
1711 let delay_hours = self.rng.random_range(1..4);
1713 current_time += chrono::Duration::hours(delay_hours);
1714
1715 while current_time.weekday() == chrono::Weekday::Sat
1717 || current_time.weekday() == chrono::Weekday::Sun
1718 {
1719 current_time += chrono::Duration::days(1);
1720 }
1721
1722 let (approver_id, approver_role) = self.select_approver(level);
1724
1725 let approve_action = ApprovalAction::new(
1726 approver_id.clone(),
1727 approver_role.to_string(),
1728 approver_role,
1729 ApprovalActionType::Approve,
1730 level,
1731 )
1732 .with_timestamp(current_time);
1733
1734 workflow.actions.push(approve_action);
1735 workflow.current_level = level;
1736 }
1737
1738 workflow.status = ApprovalStatus::Approved;
1740 workflow.approved_at = Some(current_time);
1741
1742 entry.header.approval_workflow = Some(workflow);
1743 }
1744
1745 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1747 let persona = match level {
1748 1 => UserPersona::Manager,
1749 2 => UserPersona::Controller,
1750 _ => UserPersona::Executive,
1751 };
1752
1753 if let Some(ref pool) = self.user_pool {
1755 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1756 return (user.user_id.clone(), persona);
1757 }
1758 }
1759
1760 let approver_id = match persona {
1762 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1763 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1764 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1765 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1766 };
1767
1768 (approver_id, persona)
1769 }
1770
1771 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1773 match persona_str.to_lowercase().as_str() {
1774 s if s.contains("junior") => UserPersona::JuniorAccountant,
1775 s if s.contains("senior") => UserPersona::SeniorAccountant,
1776 s if s.contains("controller") => UserPersona::Controller,
1777 s if s.contains("manager") => UserPersona::Manager,
1778 s if s.contains("executive") => UserPersona::Executive,
1779 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1780 _ => UserPersona::JuniorAccountant, }
1782 }
1783
1784 pub fn with_approval(mut self, enabled: bool) -> Self {
1786 self.approval_enabled = enabled;
1787 self
1788 }
1789
1790 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1792 self.approval_threshold = threshold;
1793 self
1794 }
1795
1796 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1802 self.sod_violation_rate = rate;
1803 self
1804 }
1805
1806 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1809 if let Some(ref workflow) = entry.header.approval_workflow {
1810 let last_approver = workflow
1812 .actions
1813 .iter()
1814 .rev()
1815 .find(|a| matches!(a.action, ApprovalActionType::Approve));
1816
1817 if let Some(approver_action) = last_approver {
1818 entry.header.approved_by = Some(approver_action.actor_id.clone());
1819 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1820 } else {
1821 entry.header.approved_by = Some(workflow.preparer_id.clone());
1823 entry.header.approval_date = Some(posting_date);
1824 }
1825
1826 if self.rng.random::<f64>() < self.sod_violation_rate {
1828 let creator = entry.header.created_by.clone();
1829 entry.header.approved_by = Some(creator);
1830 entry.header.sod_violation = true;
1831 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1832 }
1833 }
1834 }
1835
1836 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1842 self.drift_controller = Some(controller);
1843 self
1844 }
1845
1846 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1851 if config.enabled {
1852 let total_periods = self.calculate_total_periods();
1853 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1854 }
1855 self
1856 }
1857
1858 fn calculate_total_periods(&self) -> u32 {
1860 let start_year = self.start_date.year();
1861 let start_month = self.start_date.month();
1862 let end_year = self.end_date.year();
1863 let end_month = self.end_date.month();
1864
1865 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1866 }
1867
1868 fn date_to_period(&self, date: NaiveDate) -> u32 {
1870 let start_year = self.start_date.year();
1871 let start_month = self.start_date.month() as i32;
1872 let date_year = date.year();
1873 let date_month = date.month() as i32;
1874
1875 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1876 }
1877
1878 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1880 if let Some(ref controller) = self.drift_controller {
1881 let period = self.date_to_period(date);
1882 controller.compute_adjustments(period)
1883 } else {
1884 DriftAdjustments::none()
1885 }
1886 }
1887
1888 #[inline]
1890 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1891 if let Some(ref pool) = self.user_pool {
1892 let persona = if is_automated {
1893 UserPersona::AutomatedSystem
1894 } else {
1895 let roll: f64 = self.rng.random();
1897 if roll < 0.4 {
1898 UserPersona::JuniorAccountant
1899 } else if roll < 0.7 {
1900 UserPersona::SeniorAccountant
1901 } else if roll < 0.85 {
1902 UserPersona::Controller
1903 } else {
1904 UserPersona::Manager
1905 }
1906 };
1907
1908 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1909 return (user.user_id.clone(), user.persona.to_string());
1910 }
1911 }
1912
1913 if is_automated {
1915 (
1916 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1917 "automated_system".to_string(),
1918 )
1919 } else {
1920 (
1921 format!("USER{:04}", self.rng.random_range(1..=40)),
1922 "senior_accountant".to_string(),
1923 )
1924 }
1925 }
1926
1927 #[inline]
1929 fn select_source(&mut self) -> TransactionSource {
1930 let roll: f64 = self.rng.random();
1931 let dist = &self.config.source_distribution;
1932
1933 if roll < dist.manual {
1934 TransactionSource::Manual
1935 } else if roll < dist.manual + dist.automated {
1936 TransactionSource::Automated
1937 } else if roll < dist.manual + dist.automated + dist.recurring {
1938 TransactionSource::Recurring
1939 } else {
1940 TransactionSource::Adjustment
1941 }
1942 }
1943
1944 #[inline]
1946 fn document_type_for_process(process: BusinessProcess) -> &'static str {
1955 match process {
1956 BusinessProcess::P2P => "KR",
1957 BusinessProcess::O2C => "DR",
1958 BusinessProcess::R2R => "SA",
1959 BusinessProcess::H2R => "HR",
1960 BusinessProcess::A2R => "AA",
1961 _ => "SA",
1962 }
1963 }
1964
1965 fn select_business_process(&mut self) -> BusinessProcess {
1966 let roll: f64 = self.rng.random();
1967
1968 if roll < 0.35 {
1970 BusinessProcess::O2C
1971 } else if roll < 0.65 {
1972 BusinessProcess::P2P
1973 } else if roll < 0.85 {
1974 BusinessProcess::R2R
1975 } else if roll < 0.95 {
1976 BusinessProcess::H2R
1977 } else {
1978 BusinessProcess::A2R
1979 }
1980 }
1981
1982 #[inline]
1983 fn select_debit_account(&mut self) -> &GLAccount {
1984 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1985 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1986
1987 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1989 accounts
1990 } else {
1991 expense_accounts
1992 };
1993
1994 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1995 tracing::warn!(
1996 "Account selection returned empty list, falling back to first COA account"
1997 );
1998 &self.coa.accounts[0]
1999 })
2000 }
2001
2002 #[inline]
2003 fn select_credit_account(&mut self) -> &GLAccount {
2004 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2005 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2006
2007 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2009 liability_accounts
2010 } else {
2011 revenue_accounts
2012 };
2013
2014 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2015 tracing::warn!(
2016 "Account selection returned empty list, falling back to first COA account"
2017 );
2018 &self.coa.accounts[0]
2019 })
2020 }
2021}
2022
2023impl Generator for JournalEntryGenerator {
2024 type Item = JournalEntry;
2025 type Config = (
2026 TransactionConfig,
2027 Arc<ChartOfAccounts>,
2028 Vec<String>,
2029 NaiveDate,
2030 NaiveDate,
2031 );
2032
2033 fn new(config: Self::Config, seed: u64) -> Self {
2034 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2035 }
2036
2037 fn generate_one(&mut self) -> Self::Item {
2038 self.generate()
2039 }
2040
2041 fn reset(&mut self) {
2042 self.rng = seeded_rng(self.seed, 0);
2043 self.line_sampler.reset(self.seed + 1);
2044 self.amount_sampler.reset(self.seed + 2);
2045 self.temporal_sampler.reset(self.seed + 3);
2046 self.count = 0;
2047 self.uuid_factory.reset();
2048
2049 let mut ref_gen = ReferenceGenerator::new(
2051 self.start_date.year(),
2052 self.companies
2053 .first()
2054 .map(std::string::String::as_str)
2055 .unwrap_or("1000"),
2056 );
2057 ref_gen.set_prefix(
2058 ReferenceType::Invoice,
2059 &self.template_config.references.invoice_prefix,
2060 );
2061 ref_gen.set_prefix(
2062 ReferenceType::PurchaseOrder,
2063 &self.template_config.references.po_prefix,
2064 );
2065 ref_gen.set_prefix(
2066 ReferenceType::SalesOrder,
2067 &self.template_config.references.so_prefix,
2068 );
2069 self.reference_generator = ref_gen;
2070 }
2071
2072 fn count(&self) -> u64 {
2073 self.count
2074 }
2075
2076 fn seed(&self) -> u64 {
2077 self.seed
2078 }
2079}
2080
2081use datasynth_core::traits::ParallelGenerator;
2082
2083impl ParallelGenerator for JournalEntryGenerator {
2084 fn split(self, parts: usize) -> Vec<Self> {
2090 let parts = parts.max(1);
2091 (0..parts)
2092 .map(|i| {
2093 let sub_seed = self
2095 .seed
2096 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2097
2098 let mut gen = JournalEntryGenerator::new_with_full_config(
2099 self.config.clone(),
2100 Arc::clone(&self.coa),
2101 self.companies.clone(),
2102 self.start_date,
2103 self.end_date,
2104 sub_seed,
2105 self.template_config.clone(),
2106 self.user_pool.clone(),
2107 );
2108
2109 gen.company_selector = self.company_selector.clone();
2111 gen.vendor_pool = self.vendor_pool.clone();
2112 gen.customer_pool = self.customer_pool.clone();
2113 gen.material_pool = self.material_pool.clone();
2114 gen.using_real_master_data = self.using_real_master_data;
2115 gen.fraud_config = self.fraud_config.clone();
2116 gen.persona_errors_enabled = self.persona_errors_enabled;
2117 gen.approval_enabled = self.approval_enabled;
2118 gen.approval_threshold = self.approval_threshold;
2119 gen.sod_violation_rate = self.sod_violation_rate;
2120
2121 gen.uuid_factory = DeterministicUuidFactory::for_partition(
2123 sub_seed,
2124 GeneratorType::JournalEntry,
2125 i as u8,
2126 );
2127
2128 if let Some(ref config) = self.temporal_patterns_config {
2130 gen.temporal_patterns_config = Some(config.clone());
2131 if config.business_days.enabled {
2133 if let Some(ref bdc) = self.business_day_calculator {
2134 gen.business_day_calculator = Some(bdc.clone());
2135 }
2136 }
2137 if config.processing_lags.enabled {
2139 let lag_config =
2140 Self::convert_processing_lag_config(&config.processing_lags);
2141 gen.processing_lag_calculator =
2142 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2143 }
2144 }
2145
2146 if let Some(ref dc) = self.drift_controller {
2148 gen.drift_controller = Some(dc.clone());
2149 }
2150
2151 gen
2152 })
2153 .collect()
2154 }
2155}
2156
2157#[cfg(test)]
2158#[allow(clippy::unwrap_used)]
2159mod tests {
2160 use super::*;
2161 use crate::ChartOfAccountsGenerator;
2162
2163 #[test]
2164 fn test_generate_balanced_entries() {
2165 let mut coa_gen =
2166 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2167 let coa = Arc::new(coa_gen.generate());
2168
2169 let mut je_gen = JournalEntryGenerator::new_with_params(
2170 TransactionConfig::default(),
2171 coa,
2172 vec!["1000".to_string()],
2173 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2174 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2175 42,
2176 );
2177
2178 let mut balanced_count = 0;
2179 for _ in 0..100 {
2180 let entry = je_gen.generate();
2181
2182 let has_human_error = entry
2184 .header
2185 .header_text
2186 .as_ref()
2187 .map(|t| t.contains("[HUMAN_ERROR:"))
2188 .unwrap_or(false);
2189
2190 if !has_human_error {
2191 assert!(
2192 entry.is_balanced(),
2193 "Entry {:?} is not balanced",
2194 entry.header.document_id
2195 );
2196 balanced_count += 1;
2197 }
2198 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2199 }
2200
2201 assert!(
2203 balanced_count >= 80,
2204 "Expected at least 80 balanced entries, got {}",
2205 balanced_count
2206 );
2207 }
2208
2209 #[test]
2210 fn test_deterministic_generation() {
2211 let mut coa_gen =
2212 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2213 let coa = Arc::new(coa_gen.generate());
2214
2215 let mut gen1 = JournalEntryGenerator::new_with_params(
2216 TransactionConfig::default(),
2217 Arc::clone(&coa),
2218 vec!["1000".to_string()],
2219 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2220 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2221 42,
2222 );
2223
2224 let mut gen2 = JournalEntryGenerator::new_with_params(
2225 TransactionConfig::default(),
2226 coa,
2227 vec!["1000".to_string()],
2228 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2229 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2230 42,
2231 );
2232
2233 for _ in 0..50 {
2234 let e1 = gen1.generate();
2235 let e2 = gen2.generate();
2236 assert_eq!(e1.header.document_id, e2.header.document_id);
2237 assert_eq!(e1.total_debit(), e2.total_debit());
2238 }
2239 }
2240
2241 #[test]
2242 fn test_templates_generate_descriptions() {
2243 let mut coa_gen =
2244 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2245 let coa = Arc::new(coa_gen.generate());
2246
2247 let template_config = TemplateConfig {
2249 names: datasynth_config::schema::NameTemplateConfig {
2250 generate_realistic_names: true,
2251 email_domain: "test.com".to_string(),
2252 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2253 },
2254 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2255 generate_header_text: true,
2256 generate_line_text: true,
2257 },
2258 references: datasynth_config::schema::ReferenceTemplateConfig {
2259 generate_references: true,
2260 invoice_prefix: "TEST-INV".to_string(),
2261 po_prefix: "TEST-PO".to_string(),
2262 so_prefix: "TEST-SO".to_string(),
2263 },
2264 };
2265
2266 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2267 TransactionConfig::default(),
2268 coa,
2269 vec!["1000".to_string()],
2270 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2271 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2272 42,
2273 template_config,
2274 None,
2275 )
2276 .with_persona_errors(false); for _ in 0..10 {
2279 let entry = je_gen.generate();
2280
2281 assert!(
2283 entry.header.header_text.is_some(),
2284 "Header text should be populated"
2285 );
2286
2287 assert!(
2289 entry.header.reference.is_some(),
2290 "Reference should be populated"
2291 );
2292
2293 assert!(
2295 entry.header.business_process.is_some(),
2296 "Business process should be set"
2297 );
2298
2299 for line in &entry.lines {
2301 assert!(line.line_text.is_some(), "Line text should be populated");
2302 }
2303
2304 assert!(entry.is_balanced());
2306 }
2307 }
2308
2309 #[test]
2310 fn test_user_pool_integration() {
2311 let mut coa_gen =
2312 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2313 let coa = Arc::new(coa_gen.generate());
2314
2315 let companies = vec!["1000".to_string()];
2316
2317 let mut user_gen = crate::UserGenerator::new(42);
2319 let user_pool = user_gen.generate_standard(&companies);
2320
2321 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2322 TransactionConfig::default(),
2323 coa,
2324 companies,
2325 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2326 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2327 42,
2328 TemplateConfig::default(),
2329 Some(user_pool),
2330 );
2331
2332 for _ in 0..20 {
2334 let entry = je_gen.generate();
2335
2336 assert!(!entry.header.created_by.is_empty());
2339 }
2340 }
2341
2342 #[test]
2343 fn test_master_data_connection() {
2344 let mut coa_gen =
2345 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2346 let coa = Arc::new(coa_gen.generate());
2347
2348 let vendors = vec![
2350 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2351 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2352 ];
2353
2354 let customers = vec![
2356 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2357 Customer::new(
2358 "C-TEST-002",
2359 "Test Customer Two",
2360 CustomerType::SmallBusiness,
2361 ),
2362 ];
2363
2364 let materials = vec![Material::new(
2366 "MAT-TEST-001",
2367 "Test Material A",
2368 MaterialType::RawMaterial,
2369 )];
2370
2371 let generator = JournalEntryGenerator::new_with_params(
2373 TransactionConfig::default(),
2374 coa,
2375 vec!["1000".to_string()],
2376 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2377 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2378 42,
2379 );
2380
2381 assert!(!generator.is_using_real_master_data());
2383
2384 let generator_with_data = generator
2386 .with_vendors(&vendors)
2387 .with_customers(&customers)
2388 .with_materials(&materials);
2389
2390 assert!(generator_with_data.is_using_real_master_data());
2392 }
2393
2394 #[test]
2395 fn test_with_master_data_convenience_method() {
2396 let mut coa_gen =
2397 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2398 let coa = Arc::new(coa_gen.generate());
2399
2400 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2401 let customers = vec![Customer::new(
2402 "C-001",
2403 "Customer One",
2404 CustomerType::Corporate,
2405 )];
2406 let materials = vec![Material::new(
2407 "MAT-001",
2408 "Material One",
2409 MaterialType::RawMaterial,
2410 )];
2411
2412 let generator = JournalEntryGenerator::new_with_params(
2413 TransactionConfig::default(),
2414 coa,
2415 vec!["1000".to_string()],
2416 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2417 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2418 42,
2419 )
2420 .with_master_data(&vendors, &customers, &materials);
2421
2422 assert!(generator.is_using_real_master_data());
2423 }
2424
2425 #[test]
2426 fn test_stress_factors_increase_error_rate() {
2427 let mut coa_gen =
2428 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2429 let coa = Arc::new(coa_gen.generate());
2430
2431 let generator = JournalEntryGenerator::new_with_params(
2432 TransactionConfig::default(),
2433 coa,
2434 vec!["1000".to_string()],
2435 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2436 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2437 42,
2438 );
2439
2440 let base_rate = 0.1;
2441
2442 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2445 assert!(
2446 (regular_rate - base_rate).abs() < 0.01,
2447 "Regular day should have minimal stress factor adjustment"
2448 );
2449
2450 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2453 assert!(
2454 month_end_rate > regular_rate,
2455 "Month end should have higher error rate than regular day"
2456 );
2457
2458 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2461 assert!(
2462 year_end_rate > month_end_rate,
2463 "Year end should have highest error rate"
2464 );
2465
2466 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2469 assert!(
2470 friday_rate > regular_rate,
2471 "Friday should have higher error rate than mid-week"
2472 );
2473
2474 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2477 assert!(
2478 monday_rate > regular_rate,
2479 "Monday should have higher error rate than mid-week"
2480 );
2481 }
2482
2483 #[test]
2484 fn test_batching_produces_similar_entries() {
2485 let mut coa_gen =
2486 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2487 let coa = Arc::new(coa_gen.generate());
2488
2489 let mut je_gen = JournalEntryGenerator::new_with_params(
2491 TransactionConfig::default(),
2492 coa,
2493 vec!["1000".to_string()],
2494 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2495 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2496 123,
2497 )
2498 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2502
2503 for entry in &entries {
2505 assert!(
2506 entry.is_balanced(),
2507 "All entries including batched should be balanced"
2508 );
2509 }
2510
2511 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2513 std::collections::HashMap::new();
2514 for entry in &entries {
2515 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2516 }
2517
2518 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2520 assert!(
2521 dates_with_multiple > 0,
2522 "With batching, should see some dates with multiple entries"
2523 );
2524 }
2525
2526 #[test]
2527 fn test_temporal_patterns_business_days() {
2528 use datasynth_config::schema::{
2529 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2530 };
2531
2532 let mut coa_gen =
2533 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2534 let coa = Arc::new(coa_gen.generate());
2535
2536 let temporal_config = TemporalPatternsConfig {
2538 enabled: true,
2539 business_days: BusinessDaySchemaConfig {
2540 enabled: true,
2541 ..Default::default()
2542 },
2543 calendars: CalendarSchemaConfig {
2544 regions: vec!["US".to_string()],
2545 custom_holidays: vec![],
2546 },
2547 ..Default::default()
2548 };
2549
2550 let mut je_gen = JournalEntryGenerator::new_with_params(
2551 TransactionConfig::default(),
2552 coa,
2553 vec!["1000".to_string()],
2554 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2555 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2557 )
2558 .with_temporal_patterns(temporal_config, 42)
2559 .with_persona_errors(false);
2560
2561 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2563
2564 for entry in &entries {
2565 let weekday = entry.header.posting_date.weekday();
2566 assert!(
2567 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2568 "Posting date {:?} should not be a weekend",
2569 entry.header.posting_date
2570 );
2571 }
2572 }
2573
2574 #[test]
2575 fn test_default_generation_filters_weekends() {
2576 let mut coa_gen =
2580 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2581 let coa = Arc::new(coa_gen.generate());
2582
2583 let mut je_gen = JournalEntryGenerator::new_with_params(
2584 TransactionConfig::default(),
2585 coa,
2586 vec!["1000".to_string()],
2587 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2588 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2589 42,
2590 )
2591 .with_persona_errors(false);
2592
2593 let total = 500;
2594 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2595
2596 let weekend_count = entries
2597 .iter()
2598 .filter(|e| {
2599 let wd = e.header.posting_date.weekday();
2600 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2601 })
2602 .count();
2603
2604 let weekend_pct = weekend_count as f64 / total as f64;
2605 assert!(
2606 weekend_pct < 0.05,
2607 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2608 but got {:.1}% ({}/{})",
2609 weekend_pct * 100.0,
2610 weekend_count,
2611 total
2612 );
2613 }
2614
2615 #[test]
2616 fn test_document_type_derived_from_business_process() {
2617 let mut coa_gen =
2618 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2619 let coa = Arc::new(coa_gen.generate());
2620
2621 let mut je_gen = JournalEntryGenerator::new_with_params(
2622 TransactionConfig::default(),
2623 coa,
2624 vec!["1000".to_string()],
2625 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2626 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2627 99,
2628 )
2629 .with_persona_errors(false)
2630 .with_batching(false);
2631
2632 let total = 200;
2633 let mut doc_types = std::collections::HashSet::new();
2634 let mut sa_count = 0_usize;
2635
2636 for _ in 0..total {
2637 let entry = je_gen.generate();
2638 let dt = &entry.header.document_type;
2639 doc_types.insert(dt.clone());
2640 if dt == "SA" {
2641 sa_count += 1;
2642 }
2643 }
2644
2645 assert!(
2647 doc_types.len() > 3,
2648 "Expected >3 distinct document types, got {} ({:?})",
2649 doc_types.len(),
2650 doc_types,
2651 );
2652
2653 let sa_pct = sa_count as f64 / total as f64;
2655 assert!(
2656 sa_pct < 0.50,
2657 "Expected SA <50%, got {:.1}% ({}/{})",
2658 sa_pct * 100.0,
2659 sa_count,
2660 total,
2661 );
2662 }
2663
2664 #[test]
2665 fn test_enrich_line_items_account_description() {
2666 let mut coa_gen =
2667 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2668 let coa = Arc::new(coa_gen.generate());
2669
2670 let mut je_gen = JournalEntryGenerator::new_with_params(
2671 TransactionConfig::default(),
2672 coa,
2673 vec!["1000".to_string()],
2674 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2675 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2676 42,
2677 )
2678 .with_persona_errors(false);
2679
2680 let total = 200;
2681 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2682
2683 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2685 let lines_with_desc: usize = entries
2686 .iter()
2687 .flat_map(|e| &e.lines)
2688 .filter(|l| l.account_description.is_some())
2689 .count();
2690
2691 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2692 assert!(
2693 desc_pct > 0.95,
2694 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2695 desc_pct * 100.0,
2696 lines_with_desc,
2697 total_lines,
2698 );
2699 }
2700
2701 #[test]
2702 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2703 let mut coa_gen =
2704 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2705 let coa = Arc::new(coa_gen.generate());
2706
2707 let mut je_gen = JournalEntryGenerator::new_with_params(
2708 TransactionConfig::default(),
2709 coa,
2710 vec!["1000".to_string()],
2711 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2712 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2713 42,
2714 )
2715 .with_persona_errors(false);
2716
2717 let total = 300;
2718 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2719
2720 let expense_lines: Vec<&JournalEntryLine> = entries
2722 .iter()
2723 .flat_map(|e| &e.lines)
2724 .filter(|l| {
2725 let first = l.gl_account.chars().next().unwrap_or('0');
2726 first == '5' || first == '6'
2727 })
2728 .collect();
2729
2730 if !expense_lines.is_empty() {
2731 let with_cc = expense_lines
2732 .iter()
2733 .filter(|l| l.cost_center.is_some())
2734 .count();
2735 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2736 assert!(
2737 cc_pct > 0.80,
2738 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2739 cc_pct * 100.0,
2740 with_cc,
2741 expense_lines.len(),
2742 );
2743 }
2744 }
2745
2746 #[test]
2747 fn test_enrich_line_items_profit_center_and_line_text() {
2748 let mut coa_gen =
2749 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2750 let coa = Arc::new(coa_gen.generate());
2751
2752 let mut je_gen = JournalEntryGenerator::new_with_params(
2753 TransactionConfig::default(),
2754 coa,
2755 vec!["1000".to_string()],
2756 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2757 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2758 42,
2759 )
2760 .with_persona_errors(false);
2761
2762 let total = 100;
2763 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2764
2765 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2766
2767 let with_pc = entries
2769 .iter()
2770 .flat_map(|e| &e.lines)
2771 .filter(|l| l.profit_center.is_some())
2772 .count();
2773 let pc_pct = with_pc as f64 / total_lines as f64;
2774 assert!(
2775 pc_pct > 0.95,
2776 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2777 pc_pct * 100.0,
2778 with_pc,
2779 total_lines,
2780 );
2781
2782 let with_text = entries
2784 .iter()
2785 .flat_map(|e| &e.lines)
2786 .filter(|l| l.line_text.is_some())
2787 .count();
2788 let text_pct = with_text as f64 / total_lines as f64;
2789 assert!(
2790 text_pct > 0.95,
2791 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2792 text_pct * 100.0,
2793 with_text,
2794 total_lines,
2795 );
2796 }
2797
2798 #[test]
2801 fn test_je_has_audit_flags() {
2802 let mut coa_gen =
2803 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2804 let coa = Arc::new(coa_gen.generate());
2805
2806 let mut je_gen = JournalEntryGenerator::new_with_params(
2807 TransactionConfig::default(),
2808 coa,
2809 vec!["1000".to_string()],
2810 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2811 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2812 42,
2813 )
2814 .with_persona_errors(false);
2815
2816 for _ in 0..100 {
2817 let entry = je_gen.generate();
2818
2819 assert!(
2821 !entry.header.source_system.is_empty(),
2822 "source_system should be populated, got empty string"
2823 );
2824
2825 assert!(
2827 !entry.header.created_by.is_empty(),
2828 "created_by should be populated"
2829 );
2830
2831 assert!(
2833 entry.header.created_date.is_some(),
2834 "created_date should be populated"
2835 );
2836 }
2837 }
2838
2839 #[test]
2840 fn test_manual_entry_rate() {
2841 let mut coa_gen =
2842 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2843 let coa = Arc::new(coa_gen.generate());
2844
2845 let mut je_gen = JournalEntryGenerator::new_with_params(
2846 TransactionConfig::default(),
2847 coa,
2848 vec!["1000".to_string()],
2849 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2850 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2851 42,
2852 )
2853 .with_persona_errors(false)
2854 .with_batching(false);
2855
2856 let total = 1000;
2857 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2858
2859 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
2860 let manual_rate = manual_count as f64 / total as f64;
2861
2862 assert!(
2865 manual_rate > 0.01 && manual_rate < 0.50,
2866 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
2867 manual_rate * 100.0,
2868 manual_count,
2869 total,
2870 );
2871
2872 for entry in &entries {
2874 let source_is_manual = entry.header.source == TransactionSource::Manual;
2875 assert_eq!(
2876 entry.header.is_manual, source_is_manual,
2877 "is_manual should match source == Manual"
2878 );
2879 }
2880 }
2881
2882 #[test]
2883 fn test_manual_source_consistency() {
2884 let mut coa_gen =
2885 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2886 let coa = Arc::new(coa_gen.generate());
2887
2888 let mut je_gen = JournalEntryGenerator::new_with_params(
2889 TransactionConfig::default(),
2890 coa,
2891 vec!["1000".to_string()],
2892 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2893 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2894 42,
2895 )
2896 .with_persona_errors(false)
2897 .with_batching(false);
2898
2899 for _ in 0..500 {
2900 let entry = je_gen.generate();
2901
2902 if entry.header.is_manual {
2903 assert!(
2905 entry.header.source_system == "manual"
2906 || entry.header.source_system == "spreadsheet",
2907 "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
2908 entry.header.source_system,
2909 );
2910 } else {
2911 assert!(
2913 entry.header.source_system != "manual"
2914 && entry.header.source_system != "spreadsheet",
2915 "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
2916 entry.header.source_system,
2917 );
2918 }
2919 }
2920 }
2921
2922 #[test]
2923 fn test_created_date_before_posting() {
2924 let mut coa_gen =
2925 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2926 let coa = Arc::new(coa_gen.generate());
2927
2928 let mut je_gen = JournalEntryGenerator::new_with_params(
2929 TransactionConfig::default(),
2930 coa,
2931 vec!["1000".to_string()],
2932 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2933 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2934 42,
2935 )
2936 .with_persona_errors(false);
2937
2938 for _ in 0..500 {
2939 let entry = je_gen.generate();
2940
2941 if let Some(created_date) = entry.header.created_date {
2942 let created_naive_date = created_date.date();
2943 assert!(
2944 created_naive_date <= entry.header.posting_date,
2945 "created_date ({}) should be <= posting_date ({})",
2946 created_naive_date,
2947 entry.header.posting_date,
2948 );
2949 }
2950 }
2951 }
2952}