1use chrono::{Datelike, NaiveDate, Timelike};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 sod_violation_rate: f64,
67 batch_state: Option<BatchState>,
69 drift_controller: Option<DriftController>,
71 business_day_calculator: Option<BusinessDayCalculator>,
73 processing_lag_calculator: Option<ProcessingLagCalculator>,
74 temporal_patterns_config: Option<TemporalPatternsConfig>,
75 business_process_weights: [(BusinessProcess, f64); 5],
79}
80
81const DEFAULT_BUSINESS_PROCESS_WEIGHTS: [(BusinessProcess, f64); 5] = [
82 (BusinessProcess::O2C, 0.35),
83 (BusinessProcess::P2P, 0.30),
84 (BusinessProcess::R2R, 0.20),
85 (BusinessProcess::H2R, 0.10),
86 (BusinessProcess::A2R, 0.05),
87];
88
89#[derive(Clone)]
94struct BatchState {
95 base_account_number: String,
97 base_amount: rust_decimal::Decimal,
98 base_business_process: Option<BusinessProcess>,
99 base_posting_date: NaiveDate,
100 remaining: u8,
102}
103
104impl JournalEntryGenerator {
105 pub fn new_with_params(
107 config: TransactionConfig,
108 coa: Arc<ChartOfAccounts>,
109 companies: Vec<String>,
110 start_date: NaiveDate,
111 end_date: NaiveDate,
112 seed: u64,
113 ) -> Self {
114 Self::new_with_full_config(
115 config,
116 coa,
117 companies,
118 start_date,
119 end_date,
120 seed,
121 TemplateConfig::default(),
122 None,
123 )
124 }
125
126 #[allow(clippy::too_many_arguments)]
128 pub fn new_with_full_config(
129 config: TransactionConfig,
130 coa: Arc<ChartOfAccounts>,
131 companies: Vec<String>,
132 start_date: NaiveDate,
133 end_date: NaiveDate,
134 seed: u64,
135 template_config: TemplateConfig,
136 user_pool: Option<UserPool>,
137 ) -> Self {
138 let user_pool = user_pool.or_else(|| {
140 if template_config.names.generate_realistic_names {
141 let user_gen_config = UserGeneratorConfig {
142 culture_distribution: vec![
143 (
144 datasynth_core::templates::NameCulture::WesternUs,
145 template_config.names.culture_distribution.western_us,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Hispanic,
149 template_config.names.culture_distribution.hispanic,
150 ),
151 (
152 datasynth_core::templates::NameCulture::German,
153 template_config.names.culture_distribution.german,
154 ),
155 (
156 datasynth_core::templates::NameCulture::French,
157 template_config.names.culture_distribution.french,
158 ),
159 (
160 datasynth_core::templates::NameCulture::Chinese,
161 template_config.names.culture_distribution.chinese,
162 ),
163 (
164 datasynth_core::templates::NameCulture::Japanese,
165 template_config.names.culture_distribution.japanese,
166 ),
167 (
168 datasynth_core::templates::NameCulture::Indian,
169 template_config.names.culture_distribution.indian,
170 ),
171 ],
172 email_domain: template_config.names.email_domain.clone(),
173 generate_realistic_names: true,
174 };
175 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
176 Some(user_gen.generate_standard(&companies))
177 } else {
178 None
179 }
180 });
181
182 let mut ref_gen = ReferenceGenerator::new(
184 start_date.year(),
185 companies
186 .first()
187 .map(std::string::String::as_str)
188 .unwrap_or("1000"),
189 );
190 ref_gen.set_prefix(
191 ReferenceType::Invoice,
192 &template_config.references.invoice_prefix,
193 );
194 ref_gen.set_prefix(
195 ReferenceType::PurchaseOrder,
196 &template_config.references.po_prefix,
197 );
198 ref_gen.set_prefix(
199 ReferenceType::SalesOrder,
200 &template_config.references.so_prefix,
201 );
202
203 let company_selector = WeightedCompanySelector::uniform(companies.clone());
205
206 Self {
207 rng: seeded_rng(seed, 0),
208 seed,
209 config: config.clone(),
210 coa,
211 companies,
212 company_selector,
213 line_sampler: LineItemSampler::with_config(
214 seed + 1,
215 config.line_item_distribution.clone(),
216 config.even_odd_distribution.clone(),
217 config.debit_credit_distribution.clone(),
218 ),
219 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
220 temporal_sampler: TemporalSampler::with_config(
221 seed + 3,
222 config.seasonality.clone(),
223 WorkingHoursConfig::default(),
224 Vec::new(),
225 ),
226 start_date,
227 end_date,
228 count: 0,
229 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
230 user_pool,
231 description_generator: DescriptionGenerator::new(),
232 reference_generator: ref_gen,
233 template_config,
234 vendor_pool: VendorPool::standard(),
235 customer_pool: CustomerPool::standard(),
236 material_pool: None,
237 using_real_master_data: false,
238 fraud_config: FraudConfig::default(),
239 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
244 drift_controller: None,
245 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
248 Region::US,
249 start_date.year(),
250 ))),
251 processing_lag_calculator: None,
252 temporal_patterns_config: None,
253 business_process_weights: DEFAULT_BUSINESS_PROCESS_WEIGHTS,
254 }
255 }
256
257 pub fn set_business_process_weights(
261 &mut self,
262 o2c: f64,
263 p2p: f64,
264 r2r: f64,
265 h2r: f64,
266 a2r: f64,
267 ) {
268 self.business_process_weights = [
269 (BusinessProcess::O2C, o2c),
270 (BusinessProcess::P2P, p2p),
271 (BusinessProcess::R2R, r2r),
272 (BusinessProcess::H2R, h2r),
273 (BusinessProcess::A2R, a2r),
274 ];
275 }
276
277 pub fn from_generator_config(
282 full_config: &GeneratorConfig,
283 coa: Arc<ChartOfAccounts>,
284 start_date: NaiveDate,
285 end_date: NaiveDate,
286 seed: u64,
287 ) -> Self {
288 let companies: Vec<String> = full_config
289 .companies
290 .iter()
291 .map(|c| c.code.clone())
292 .collect();
293
294 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
296
297 let mut generator = Self::new_with_full_config(
298 full_config.transactions.clone(),
299 coa,
300 companies,
301 start_date,
302 end_date,
303 seed,
304 full_config.templates.clone(),
305 None,
306 );
307
308 generator.company_selector = company_selector;
310
311 generator.fraud_config = full_config.fraud.clone();
313
314 let temporal_config = &full_config.temporal_patterns;
316 if temporal_config.enabled {
317 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
318 }
319
320 generator
321 }
322
323 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
330 if config.business_days.enabled {
332 let region = config
333 .calendars
334 .regions
335 .first()
336 .map(|r| Self::parse_region(r))
337 .unwrap_or(Region::US);
338
339 let calendar = HolidayCalendar::new(region, self.start_date.year());
340 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
341 }
342
343 if config.processing_lags.enabled {
345 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
346 self.processing_lag_calculator =
347 Some(ProcessingLagCalculator::with_config(seed, lag_config));
348 }
349
350 let model = config.period_end.model.as_deref().unwrap_or("flat");
352 if model != "flat"
353 || config
354 .period_end
355 .month_end
356 .as_ref()
357 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
358 {
359 let dynamics = Self::convert_period_end_config(&config.period_end);
360 self.temporal_sampler.set_period_end_dynamics(dynamics);
361 }
362
363 self.temporal_patterns_config = Some(config);
364 self
365 }
366
367 pub fn with_country_pack_temporal(
375 mut self,
376 config: TemporalPatternsConfig,
377 seed: u64,
378 pack: &CountryPack,
379 ) -> Self {
380 if config.business_days.enabled {
382 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
383 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
384 }
385
386 if config.processing_lags.enabled {
388 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
389 self.processing_lag_calculator =
390 Some(ProcessingLagCalculator::with_config(seed, lag_config));
391 }
392
393 let model = config.period_end.model.as_deref().unwrap_or("flat");
395 if model != "flat"
396 || config
397 .period_end
398 .month_end
399 .as_ref()
400 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
401 {
402 let dynamics = Self::convert_period_end_config(&config.period_end);
403 self.temporal_sampler.set_period_end_dynamics(dynamics);
404 }
405
406 self.temporal_patterns_config = Some(config);
407 self
408 }
409
410 fn convert_processing_lag_config(
412 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
413 ) -> ProcessingLagConfig {
414 let mut config = ProcessingLagConfig {
415 enabled: schema.enabled,
416 ..Default::default()
417 };
418
419 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
421 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
422 if let Some(min) = lag.min_hours {
423 dist.min_lag_hours = min;
424 }
425 if let Some(max) = lag.max_hours {
426 dist.max_lag_hours = max;
427 }
428 dist
429 };
430
431 if let Some(ref lag) = schema.sales_order_lag {
433 config
434 .event_lags
435 .insert(EventType::SalesOrder, convert_lag(lag));
436 }
437 if let Some(ref lag) = schema.purchase_order_lag {
438 config
439 .event_lags
440 .insert(EventType::PurchaseOrder, convert_lag(lag));
441 }
442 if let Some(ref lag) = schema.goods_receipt_lag {
443 config
444 .event_lags
445 .insert(EventType::GoodsReceipt, convert_lag(lag));
446 }
447 if let Some(ref lag) = schema.invoice_receipt_lag {
448 config
449 .event_lags
450 .insert(EventType::InvoiceReceipt, convert_lag(lag));
451 }
452 if let Some(ref lag) = schema.invoice_issue_lag {
453 config
454 .event_lags
455 .insert(EventType::InvoiceIssue, convert_lag(lag));
456 }
457 if let Some(ref lag) = schema.payment_lag {
458 config
459 .event_lags
460 .insert(EventType::Payment, convert_lag(lag));
461 }
462 if let Some(ref lag) = schema.journal_entry_lag {
463 config
464 .event_lags
465 .insert(EventType::JournalEntry, convert_lag(lag));
466 }
467
468 if let Some(ref cross_day) = schema.cross_day_posting {
470 config.cross_day = CrossDayConfig {
471 enabled: cross_day.enabled,
472 probability_by_hour: cross_day.probability_by_hour.clone(),
473 ..Default::default()
474 };
475 }
476
477 config
478 }
479
480 fn convert_period_end_config(
482 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
483 ) -> PeriodEndDynamics {
484 let model_type = schema.model.as_deref().unwrap_or("exponential");
485
486 let convert_period =
488 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
489 default_peak: f64|
490 -> PeriodEndConfig {
491 if let Some(p) = period {
492 let model = match model_type {
493 "flat" => PeriodEndModel::FlatMultiplier {
494 multiplier: p.peak_multiplier.unwrap_or(default_peak),
495 },
496 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
497 start_day: p.start_day.unwrap_or(-10),
498 sustained_high_days: p.sustained_high_days.unwrap_or(3),
499 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
500 ramp_up_days: 3, },
502 _ => PeriodEndModel::ExponentialAcceleration {
503 start_day: p.start_day.unwrap_or(-10),
504 base_multiplier: p.base_multiplier.unwrap_or(1.0),
505 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
506 decay_rate: p.decay_rate.unwrap_or(0.3),
507 },
508 };
509 PeriodEndConfig {
510 enabled: true,
511 model,
512 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
513 }
514 } else {
515 PeriodEndConfig {
516 enabled: true,
517 model: PeriodEndModel::ExponentialAcceleration {
518 start_day: -10,
519 base_multiplier: 1.0,
520 peak_multiplier: default_peak,
521 decay_rate: 0.3,
522 },
523 additional_multiplier: 1.0,
524 }
525 }
526 };
527
528 PeriodEndDynamics::new(
529 convert_period(schema.month_end.as_ref(), 2.0),
530 convert_period(schema.quarter_end.as_ref(), 3.5),
531 convert_period(schema.year_end.as_ref(), 5.0),
532 )
533 }
534
535 fn parse_region(region_str: &str) -> Region {
537 match region_str.to_uppercase().as_str() {
538 "US" => Region::US,
539 "DE" => Region::DE,
540 "GB" => Region::GB,
541 "CN" => Region::CN,
542 "JP" => Region::JP,
543 "IN" => Region::IN,
544 "BR" => Region::BR,
545 "MX" => Region::MX,
546 "AU" => Region::AU,
547 "SG" => Region::SG,
548 "KR" => Region::KR,
549 "FR" => Region::FR,
550 "IT" => Region::IT,
551 "ES" => Region::ES,
552 "CA" => Region::CA,
553 _ => Region::US,
554 }
555 }
556
557 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
559 self.company_selector = selector;
560 }
561
562 pub fn company_selector(&self) -> &WeightedCompanySelector {
564 &self.company_selector
565 }
566
567 pub fn set_fraud_config(&mut self, config: FraudConfig) {
569 self.fraud_config = config;
570 }
571
572 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
577 if !vendors.is_empty() {
578 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
579 self.using_real_master_data = true;
580 }
581 self
582 }
583
584 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
589 if !customers.is_empty() {
590 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
591 self.using_real_master_data = true;
592 }
593 self
594 }
595
596 pub fn with_materials(mut self, materials: &[Material]) -> Self {
600 if !materials.is_empty() {
601 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
602 self.using_real_master_data = true;
603 }
604 self
605 }
606
607 pub fn with_master_data(
612 self,
613 vendors: &[Vendor],
614 customers: &[Customer],
615 materials: &[Material],
616 ) -> Self {
617 self.with_vendors(vendors)
618 .with_customers(customers)
619 .with_materials(materials)
620 }
621
622 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
629 let name_gen =
630 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
631 let config = UserGeneratorConfig {
632 culture_distribution: Vec::new(),
635 email_domain: name_gen.email_domain().to_string(),
636 generate_realistic_names: true,
637 };
638 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
639 self.user_pool = Some(user_gen.generate_standard(&self.companies));
640 self
641 }
642
643 pub fn is_using_real_master_data(&self) -> bool {
645 self.using_real_master_data
646 }
647
648 fn determine_fraud(&mut self) -> Option<FraudType> {
650 if !self.fraud_config.enabled {
651 return None;
652 }
653
654 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
656 return None;
657 }
658
659 Some(self.select_fraud_type())
661 }
662
663 fn select_fraud_type(&mut self) -> FraudType {
665 let dist = &self.fraud_config.fraud_type_distribution;
666 let roll: f64 = self.rng.random();
667
668 let mut cumulative = 0.0;
669
670 cumulative += dist.suspense_account_abuse;
671 if roll < cumulative {
672 return FraudType::SuspenseAccountAbuse;
673 }
674
675 cumulative += dist.fictitious_transaction;
676 if roll < cumulative {
677 return FraudType::FictitiousTransaction;
678 }
679
680 cumulative += dist.revenue_manipulation;
681 if roll < cumulative {
682 return FraudType::RevenueManipulation;
683 }
684
685 cumulative += dist.expense_capitalization;
686 if roll < cumulative {
687 return FraudType::ExpenseCapitalization;
688 }
689
690 cumulative += dist.split_transaction;
691 if roll < cumulative {
692 return FraudType::SplitTransaction;
693 }
694
695 cumulative += dist.timing_anomaly;
696 if roll < cumulative {
697 return FraudType::TimingAnomaly;
698 }
699
700 cumulative += dist.unauthorized_access;
701 if roll < cumulative {
702 return FraudType::UnauthorizedAccess;
703 }
704
705 FraudType::DuplicatePayment
707 }
708
709 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
711 match fraud_type {
712 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
713 FraudAmountPattern::ThresholdAdjacent
714 }
715 FraudType::FictitiousTransaction
716 | FraudType::FictitiousEntry
717 | FraudType::SuspenseAccountAbuse
718 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
719 FraudType::RevenueManipulation
720 | FraudType::ExpenseCapitalization
721 | FraudType::ImproperCapitalization
722 | FraudType::ReserveManipulation
723 | FraudType::UnauthorizedAccess
724 | FraudType::PrematureRevenue
725 | FraudType::UnderstatedLiabilities
726 | FraudType::OverstatedAssets
727 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
728 FraudType::DuplicatePayment
729 | FraudType::TimingAnomaly
730 | FraudType::SelfApproval
731 | FraudType::ExceededApprovalLimit
732 | FraudType::SegregationOfDutiesViolation
733 | FraudType::UnauthorizedApproval
734 | FraudType::CollusiveApproval
735 | FraudType::FictitiousVendor
736 | FraudType::ShellCompanyPayment
737 | FraudType::Kickback
738 | FraudType::KickbackScheme
739 | FraudType::InvoiceManipulation
740 | FraudType::AssetMisappropriation
741 | FraudType::InventoryTheft
742 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
743 FraudType::ImproperRevenueRecognition
745 | FraudType::ImproperPoAllocation
746 | FraudType::VariableConsiderationManipulation
747 | FraudType::ContractModificationMisstatement => {
748 FraudAmountPattern::StatisticallyImprobable
749 }
750 FraudType::LeaseClassificationManipulation
752 | FraudType::OffBalanceSheetLease
753 | FraudType::LeaseLiabilityUnderstatement
754 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
755 FraudType::FairValueHierarchyManipulation
757 | FraudType::Level3InputManipulation
758 | FraudType::ValuationTechniqueManipulation => {
759 FraudAmountPattern::StatisticallyImprobable
760 }
761 FraudType::DelayedImpairment
763 | FraudType::ImpairmentTestAvoidance
764 | FraudType::CashFlowProjectionManipulation
765 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
766 FraudType::BidRigging
768 | FraudType::PhantomVendorContract
769 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
770 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
771 FraudType::GhostEmployeePayroll
773 | FraudType::PayrollInflation
774 | FraudType::DuplicateExpenseReport
775 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
776 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
777 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
779 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
780 }
781 }
782
783 #[inline]
785 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
786 self.uuid_factory.next()
787 }
788
789 const COST_CENTER_POOL: &'static [&'static str] =
791 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
792
793 fn enrich_line_items(&self, entry: &mut JournalEntry) {
799 let posting_date = entry.header.posting_date;
800 let company_code = &entry.header.company_code;
801 let header_text = entry.header.header_text.clone();
802 let business_process = entry.header.business_process;
803
804 let doc_id_bytes = entry.header.document_id.as_bytes();
806 let mut cc_seed: usize = 0;
807 for &b in doc_id_bytes {
808 cc_seed = cc_seed.wrapping_add(b as usize);
809 }
810
811 for (i, line) in entry.lines.iter_mut().enumerate() {
812 if line.account_description.is_none() {
814 line.account_description = self
815 .coa
816 .get_account(&line.gl_account)
817 .map(|a| a.short_description.clone());
818 }
819
820 if line.cost_center.is_none() {
822 let first_char = line.gl_account.chars().next().unwrap_or('0');
823 if first_char == '5' || first_char == '6' {
824 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
825 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
826 }
827 }
828
829 if line.profit_center.is_none() {
831 let suffix = match business_process {
832 Some(BusinessProcess::P2P) => "-P2P",
833 Some(BusinessProcess::O2C) => "-O2C",
834 Some(BusinessProcess::R2R) => "-R2R",
835 Some(BusinessProcess::H2R) => "-H2R",
836 _ => "",
837 };
838 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
839 }
840
841 if line.line_text.is_none() {
843 line.line_text = header_text.clone();
844 }
845
846 if line.value_date.is_none()
848 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
849 {
850 line.value_date = Some(posting_date);
851 }
852
853 if line.assignment.is_none() {
855 if line.gl_account.starts_with("2000") {
856 if let Some(ref ht) = header_text {
858 if let Some(vendor_part) = ht.rsplit(" - ").next() {
860 if vendor_part.starts_with("V-")
861 || vendor_part.starts_with("VENDOR")
862 || vendor_part.starts_with("Vendor")
863 {
864 line.assignment = Some(vendor_part.to_string());
865 }
866 }
867 }
868 } else if line.gl_account.starts_with("1100") {
869 if let Some(ref ht) = header_text {
871 if let Some(customer_part) = ht.rsplit(" - ").next() {
872 if customer_part.starts_with("C-")
873 || customer_part.starts_with("CUST")
874 || customer_part.starts_with("Customer")
875 {
876 line.assignment = Some(customer_part.to_string());
877 }
878 }
879 }
880 }
881 }
882 }
883 }
884
885 pub fn generate(&mut self) -> JournalEntry {
887 debug!(
888 count = self.count,
889 companies = self.companies.len(),
890 start_date = %self.start_date,
891 end_date = %self.end_date,
892 "Generating journal entry"
893 );
894
895 if let Some(ref state) = self.batch_state {
897 if state.remaining > 0 {
898 return self.generate_batched_entry();
899 }
900 }
901
902 self.count += 1;
903
904 let document_id = self.generate_deterministic_uuid();
906
907 let mut posting_date = self
909 .temporal_sampler
910 .sample_date(self.start_date, self.end_date);
911
912 if let Some(ref calc) = self.business_day_calculator {
914 if !calc.is_business_day(posting_date) {
915 posting_date = calc.next_business_day(posting_date, false);
917 if posting_date > self.end_date {
919 posting_date = calc.prev_business_day(self.end_date, true);
920 }
921 }
922 }
923
924 let company_code = self.company_selector.select(&mut self.rng).to_string();
926
927 let line_spec = self.line_sampler.sample();
929
930 let source = self.select_source();
932 let is_automated = matches!(
933 source,
934 TransactionSource::Automated | TransactionSource::Recurring
935 );
936
937 let business_process = self.select_business_process();
939
940 let fraud_type = self.determine_fraud();
942 let is_fraud = fraud_type.is_some();
943
944 let time = self.temporal_sampler.sample_time(!is_automated);
946 let created_at = posting_date.and_time(time).and_utc();
947
948 let (created_by, user_persona) = self.select_user(is_automated);
950
951 let mut header =
953 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
954 header.created_at = created_at;
955 header.source = source;
956 header.created_by = created_by;
957 header.user_persona = user_persona;
958 header.business_process = Some(business_process);
959 header.document_type = Self::document_type_for_process(business_process).to_string();
960 header.is_fraud = is_fraud;
961 header.fraud_type = fraud_type;
962
963 let is_manual = matches!(source, TransactionSource::Manual);
965 header.is_manual = is_manual;
966
967 header.source_system = if is_manual {
969 if self.rng.random::<f64>() < 0.70 {
970 "manual".to_string()
971 } else {
972 "spreadsheet".to_string()
973 }
974 } else {
975 let roll: f64 = self.rng.random();
976 if roll < 0.40 {
977 "SAP-FI".to_string()
978 } else if roll < 0.60 {
979 "SAP-MM".to_string()
980 } else if roll < 0.80 {
981 "SAP-SD".to_string()
982 } else if roll < 0.95 {
983 "interface".to_string()
984 } else {
985 "SAP-HR".to_string()
986 }
987 };
988
989 let is_post_close = posting_date.month() == self.end_date.month()
992 && posting_date.year() == self.end_date.year()
993 && posting_date.day() > 25;
994 header.is_post_close = is_post_close;
995
996 let created_date = if is_manual {
999 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
1000 } else {
1001 let lag_days = self.rng.random_range(0i64..=3);
1002 let created_naive_date = posting_date
1003 .checked_sub_signed(chrono::Duration::days(lag_days))
1004 .unwrap_or(posting_date);
1005 created_naive_date.and_hms_opt(
1006 self.rng.random_range(8u32..=17),
1007 self.rng.random_range(0u32..=59),
1008 self.rng.random_range(0u32..=59),
1009 )
1010 };
1011 header.created_date = created_date;
1012
1013 let mut context =
1015 DescriptionContext::with_period(posting_date.month(), posting_date.year());
1016
1017 match business_process {
1019 BusinessProcess::P2P => {
1020 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
1021 context.vendor_name = Some(vendor.name.clone());
1022 }
1023 }
1024 BusinessProcess::O2C => {
1025 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
1026 context.customer_name = Some(customer.name.clone());
1027 }
1028 }
1029 _ => {}
1030 }
1031
1032 if self.template_config.descriptions.generate_header_text {
1034 header.header_text = Some(self.description_generator.generate_header_text(
1035 business_process,
1036 &context,
1037 &mut self.rng,
1038 ));
1039 }
1040
1041 if self.template_config.references.generate_references {
1043 header.reference = Some(
1044 self.reference_generator
1045 .generate_for_process_year(business_process, posting_date.year()),
1046 );
1047 }
1048
1049 header.source_document = header
1051 .reference
1052 .as_deref()
1053 .and_then(DocumentRef::parse)
1054 .or_else(|| {
1055 if header.source == TransactionSource::Manual {
1056 Some(DocumentRef::Manual)
1057 } else {
1058 None
1059 }
1060 });
1061
1062 let mut entry = JournalEntry::new(header);
1064
1065 let base_amount = if let Some(ft) = fraud_type {
1067 let pattern = self.fraud_type_to_amount_pattern(ft);
1068 self.amount_sampler.sample_fraud(pattern)
1069 } else {
1070 self.amount_sampler.sample()
1071 };
1072
1073 let drift_adjusted_amount = {
1075 let drift = self.get_drift_adjustments(posting_date);
1076 if drift.amount_mean_multiplier != 1.0 {
1077 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
1079 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
1080 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
1081 } else {
1082 base_amount
1083 }
1084 };
1085
1086 let total_amount = if is_automated {
1088 drift_adjusted_amount } else {
1090 self.apply_human_variation(drift_adjusted_amount)
1091 };
1092
1093 let debit_amounts = self
1095 .amount_sampler
1096 .sample_summing_to(line_spec.debit_count, total_amount);
1097 for (i, amount) in debit_amounts.into_iter().enumerate() {
1098 let account_number = self.select_debit_account().account_number.clone();
1099 let mut line = JournalEntryLine::debit(
1100 entry.header.document_id,
1101 (i + 1) as u32,
1102 account_number.clone(),
1103 amount,
1104 );
1105
1106 if self.template_config.descriptions.generate_line_text {
1108 line.line_text = Some(self.description_generator.generate_line_text(
1109 &account_number,
1110 &context,
1111 &mut self.rng,
1112 ));
1113 }
1114
1115 entry.add_line(line);
1116 }
1117
1118 let credit_amounts = self
1120 .amount_sampler
1121 .sample_summing_to(line_spec.credit_count, total_amount);
1122 for (i, amount) in credit_amounts.into_iter().enumerate() {
1123 let account_number = self.select_credit_account().account_number.clone();
1124 let mut line = JournalEntryLine::credit(
1125 entry.header.document_id,
1126 (line_spec.debit_count + i + 1) as u32,
1127 account_number.clone(),
1128 amount,
1129 );
1130
1131 if self.template_config.descriptions.generate_line_text {
1133 line.line_text = Some(self.description_generator.generate_line_text(
1134 &account_number,
1135 &context,
1136 &mut self.rng,
1137 ));
1138 }
1139
1140 entry.add_line(line);
1141 }
1142
1143 self.enrich_line_items(&mut entry);
1145
1146 if self.persona_errors_enabled && !is_automated {
1148 self.maybe_inject_persona_error(&mut entry);
1149 }
1150
1151 if self.approval_enabled {
1153 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1154 }
1155
1156 self.populate_approval_fields(&mut entry, posting_date);
1158
1159 self.maybe_start_batch(&entry);
1161
1162 entry
1163 }
1164
1165 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1170 self.persona_errors_enabled = enabled;
1171 self
1172 }
1173
1174 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1179 self.fraud_config = config;
1180 self
1181 }
1182
1183 pub fn persona_errors_enabled(&self) -> bool {
1185 self.persona_errors_enabled
1186 }
1187
1188 pub fn with_batching(mut self, enabled: bool) -> Self {
1193 if !enabled {
1194 self.batch_state = None;
1195 }
1196 self
1197 }
1198
1199 pub fn batching_enabled(&self) -> bool {
1201 true
1203 }
1204
1205 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1210 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1212 return;
1213 }
1214
1215 if self.rng.random::<f64>() > 0.15 {
1217 return;
1218 }
1219
1220 let base_account = entry
1222 .lines
1223 .first()
1224 .map(|l| l.gl_account.clone())
1225 .unwrap_or_default();
1226
1227 let base_amount = entry.total_debit();
1228
1229 self.batch_state = Some(BatchState {
1230 base_account_number: base_account,
1231 base_amount,
1232 base_business_process: entry.header.business_process,
1233 base_posting_date: entry.header.posting_date,
1234 remaining: self.rng.random_range(2..7), });
1236 }
1237
1238 fn generate_batched_entry(&mut self) -> JournalEntry {
1246 use rust_decimal::Decimal;
1247
1248 if let Some(ref mut state) = self.batch_state {
1250 state.remaining = state.remaining.saturating_sub(1);
1251 }
1252
1253 let Some(batch) = self.batch_state.clone() else {
1254 tracing::warn!(
1257 "generate_batched_entry called without batch_state; generating standard entry"
1258 );
1259 self.batch_state = None;
1260 return self.generate();
1261 };
1262
1263 let posting_date = batch.base_posting_date;
1265
1266 self.count += 1;
1267 let document_id = self.generate_deterministic_uuid();
1268
1269 let company_code = self.company_selector.select(&mut self.rng).to_string();
1271
1272 let _line_spec = LineItemSpec {
1274 total_count: 2,
1275 debit_count: 1,
1276 credit_count: 1,
1277 split_type: DebitCreditSplit::Equal,
1278 };
1279
1280 let source = TransactionSource::Manual;
1282
1283 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1285
1286 let time = self.temporal_sampler.sample_time(true);
1288 let created_at = posting_date.and_time(time).and_utc();
1289
1290 let (created_by, user_persona) = self.select_user(false);
1292
1293 let mut header =
1295 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1296 header.created_at = created_at;
1297 header.source = source;
1298 header.created_by = created_by;
1299 header.user_persona = user_persona;
1300 header.business_process = Some(business_process);
1301 header.document_type = Self::document_type_for_process(business_process).to_string();
1302
1303 header.source_document = Some(DocumentRef::Manual);
1305
1306 header.is_manual = true;
1308 header.source_system = if self.rng.random::<f64>() < 0.70 {
1309 "manual".to_string()
1310 } else {
1311 "spreadsheet".to_string()
1312 };
1313 header.is_post_close = posting_date.month() == self.end_date.month()
1314 && posting_date.year() == self.end_date.year()
1315 && posting_date.day() > 25;
1316 header.created_date =
1317 posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
1318
1319 let variation = self.rng.random_range(-0.15..0.15);
1321 let varied_amount =
1322 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1323 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1324
1325 let mut entry = JournalEntry::new(header);
1327
1328 let debit_line = JournalEntryLine::debit(
1330 entry.header.document_id,
1331 1,
1332 batch.base_account_number.clone(),
1333 total_amount,
1334 );
1335 entry.add_line(debit_line);
1336
1337 let credit_account = self.select_credit_account().account_number.clone();
1339 let credit_line =
1340 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1341 entry.add_line(credit_line);
1342
1343 self.enrich_line_items(&mut entry);
1345
1346 if self.persona_errors_enabled {
1348 self.maybe_inject_persona_error(&mut entry);
1349 }
1350
1351 if self.approval_enabled {
1353 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1354 }
1355
1356 self.populate_approval_fields(&mut entry, posting_date);
1358
1359 if batch.remaining <= 1 {
1361 self.batch_state = None;
1362 }
1363
1364 entry
1365 }
1366
1367 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1369 let persona_str = &entry.header.user_persona;
1371 let persona = match persona_str.to_lowercase().as_str() {
1372 s if s.contains("junior") => UserPersona::JuniorAccountant,
1373 s if s.contains("senior") => UserPersona::SeniorAccountant,
1374 s if s.contains("controller") => UserPersona::Controller,
1375 s if s.contains("manager") => UserPersona::Manager,
1376 s if s.contains("executive") => UserPersona::Executive,
1377 _ => return, };
1379
1380 let base_error_rate = persona.error_rate();
1382
1383 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1385
1386 if self.rng.random::<f64>() >= adjusted_rate {
1388 return; }
1390
1391 self.inject_human_error(entry, persona);
1393 }
1394
1395 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1404 use chrono::Datelike;
1405
1406 let mut rate = base_rate;
1407 let day = posting_date.day();
1408 let month = posting_date.month();
1409
1410 if month == 12 && day >= 28 {
1412 rate *= 2.0;
1413 return rate.min(0.5); }
1415
1416 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1418 rate *= 1.75; return rate.min(0.4);
1420 }
1421
1422 if day >= 28 {
1424 rate *= 1.5; }
1426
1427 let weekday = posting_date.weekday();
1429 match weekday {
1430 chrono::Weekday::Mon => {
1431 rate *= 1.2;
1433 }
1434 chrono::Weekday::Fri => {
1435 rate *= 1.3;
1437 }
1438 _ => {}
1439 }
1440
1441 rate.min(0.4)
1443 }
1444
1445 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1454 use rust_decimal::Decimal;
1455
1456 if amount < Decimal::from(10) {
1458 return amount;
1459 }
1460
1461 if self.rng.random::<f64>() > 0.70 {
1463 return amount;
1464 }
1465
1466 let variation_type: u8 = self.rng.random_range(0..4);
1468
1469 match variation_type {
1470 0 => {
1471 let variation_pct = self.rng.random_range(-0.02..0.02);
1473 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1474 (amount + variation).round_dp(2)
1475 }
1476 1 => {
1477 let ten = Decimal::from(10);
1479 (amount / ten).round() * ten
1480 }
1481 2 => {
1482 if amount >= Decimal::from(500) {
1484 let hundred = Decimal::from(100);
1485 (amount / hundred).round() * hundred
1486 } else {
1487 amount
1488 }
1489 }
1490 3 => {
1491 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1493 (amount + cents).max(Decimal::ZERO).round_dp(2)
1494 }
1495 _ => amount,
1496 }
1497 }
1498
1499 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1505 let balancing_idx = entry.lines.iter().position(|l| {
1507 if modified_was_debit {
1508 l.credit_amount > Decimal::ZERO
1509 } else {
1510 l.debit_amount > Decimal::ZERO
1511 }
1512 });
1513
1514 if let Some(idx) = balancing_idx {
1515 if modified_was_debit {
1516 entry.lines[idx].credit_amount += impact;
1517 } else {
1518 entry.lines[idx].debit_amount += impact;
1519 }
1520 }
1521 }
1522
1523 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1528 use rust_decimal::Decimal;
1529
1530 let error_type: u8 = match persona {
1532 UserPersona::JuniorAccountant => {
1533 self.rng.random_range(0..5)
1535 }
1536 UserPersona::SeniorAccountant => {
1537 self.rng.random_range(0..3)
1539 }
1540 UserPersona::Controller | UserPersona::Manager => {
1541 self.rng.random_range(3..5)
1543 }
1544 _ => return,
1545 };
1546
1547 match error_type {
1548 0 => {
1549 if let Some(line) = entry.lines.get_mut(0) {
1551 let is_debit = line.debit_amount > Decimal::ZERO;
1552 let original_amount = if is_debit {
1553 line.debit_amount
1554 } else {
1555 line.credit_amount
1556 };
1557
1558 let s = original_amount.to_string();
1560 if s.len() >= 2 {
1561 let chars: Vec<char> = s.chars().collect();
1562 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1563 if chars[pos].is_ascii_digit()
1564 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1565 {
1566 let mut new_chars = chars;
1567 new_chars.swap(pos, pos + 1);
1568 if let Ok(new_amount) =
1569 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1570 {
1571 let impact = new_amount - original_amount;
1572
1573 if is_debit {
1575 entry.lines[0].debit_amount = new_amount;
1576 } else {
1577 entry.lines[0].credit_amount = new_amount;
1578 }
1579
1580 Self::rebalance_entry(entry, is_debit, impact);
1582
1583 entry.header.header_text = Some(
1584 entry.header.header_text.clone().unwrap_or_default()
1585 + " [HUMAN_ERROR:TRANSPOSITION]",
1586 );
1587 }
1588 }
1589 }
1590 }
1591 }
1592 1 => {
1593 if let Some(line) = entry.lines.get_mut(0) {
1595 let is_debit = line.debit_amount > Decimal::ZERO;
1596 let original_amount = if is_debit {
1597 line.debit_amount
1598 } else {
1599 line.credit_amount
1600 };
1601
1602 let new_amount = original_amount * Decimal::new(10, 0);
1603 let impact = new_amount - original_amount;
1604
1605 if is_debit {
1607 entry.lines[0].debit_amount = new_amount;
1608 } else {
1609 entry.lines[0].credit_amount = new_amount;
1610 }
1611
1612 Self::rebalance_entry(entry, is_debit, impact);
1614
1615 entry.header.header_text = Some(
1616 entry.header.header_text.clone().unwrap_or_default()
1617 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1618 );
1619 }
1620 }
1621 2 => {
1622 if let Some(ref mut text) = entry.header.header_text {
1624 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1625 let correct = ["the", "and", "with", "that", "receive"];
1626 let idx = self.rng.random_range(0..typos.len());
1627 if text.to_lowercase().contains(correct[idx]) {
1628 *text = text.replace(correct[idx], typos[idx]);
1629 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1630 }
1631 }
1632 }
1633 3 => {
1634 if let Some(line) = entry.lines.get_mut(0) {
1636 let is_debit = line.debit_amount > Decimal::ZERO;
1637 let original_amount = if is_debit {
1638 line.debit_amount
1639 } else {
1640 line.credit_amount
1641 };
1642
1643 let new_amount =
1644 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1645 let impact = new_amount - original_amount;
1646
1647 if is_debit {
1649 entry.lines[0].debit_amount = new_amount;
1650 } else {
1651 entry.lines[0].credit_amount = new_amount;
1652 }
1653
1654 Self::rebalance_entry(entry, is_debit, impact);
1656
1657 entry.header.header_text = Some(
1658 entry.header.header_text.clone().unwrap_or_default()
1659 + " [HUMAN_ERROR:ROUNDED]",
1660 );
1661 }
1662 }
1663 4 if entry.header.document_date == entry.header.posting_date => {
1666 let days_late = self.rng.random_range(5..15);
1667 entry.header.document_date =
1668 entry.header.posting_date - chrono::Duration::days(days_late);
1669 entry.header.header_text = Some(
1670 entry.header.header_text.clone().unwrap_or_default()
1671 + " [HUMAN_ERROR:LATE_POSTING]",
1672 );
1673 }
1674 _ => {}
1675 }
1676 }
1677
1678 fn maybe_apply_approval_workflow(
1683 &mut self,
1684 entry: &mut JournalEntry,
1685 _posting_date: NaiveDate,
1686 ) {
1687 use rust_decimal::Decimal;
1688
1689 let amount = entry.total_debit();
1690
1691 if amount <= self.approval_threshold {
1693 let workflow = ApprovalWorkflow::auto_approved(
1695 entry.header.created_by.clone(),
1696 entry.header.user_persona.clone(),
1697 amount,
1698 entry.header.created_at,
1699 );
1700 entry.header.approval_workflow = Some(workflow);
1701 return;
1702 }
1703
1704 entry.header.sox_relevant = true;
1706
1707 let required_levels = if amount > Decimal::new(100000, 0) {
1709 3 } else if amount > Decimal::new(50000, 0) {
1711 2 } else {
1713 1 };
1715
1716 let mut workflow = ApprovalWorkflow::new(
1718 entry.header.created_by.clone(),
1719 entry.header.user_persona.clone(),
1720 amount,
1721 );
1722 workflow.required_levels = required_levels;
1723
1724 let submit_time = entry.header.created_at;
1726 let submit_action = ApprovalAction::new(
1727 entry.header.created_by.clone(),
1728 entry.header.user_persona.clone(),
1729 self.parse_persona(&entry.header.user_persona),
1730 ApprovalActionType::Submit,
1731 0,
1732 )
1733 .with_timestamp(submit_time);
1734
1735 workflow.actions.push(submit_action);
1736 workflow.status = ApprovalStatus::Pending;
1737 workflow.submitted_at = Some(submit_time);
1738
1739 let mut current_time = submit_time;
1741 for level in 1..=required_levels {
1742 let delay_hours = self.rng.random_range(1..4);
1744 current_time += chrono::Duration::hours(delay_hours);
1745
1746 while current_time.weekday() == chrono::Weekday::Sat
1748 || current_time.weekday() == chrono::Weekday::Sun
1749 {
1750 current_time += chrono::Duration::days(1);
1751 }
1752
1753 let (approver_id, approver_role) = self.select_approver(level);
1755
1756 let approve_action = ApprovalAction::new(
1757 approver_id.clone(),
1758 approver_role.to_string(),
1759 approver_role,
1760 ApprovalActionType::Approve,
1761 level,
1762 )
1763 .with_timestamp(current_time);
1764
1765 workflow.actions.push(approve_action);
1766 workflow.current_level = level;
1767 }
1768
1769 workflow.status = ApprovalStatus::Approved;
1771 workflow.approved_at = Some(current_time);
1772
1773 entry.header.approval_workflow = Some(workflow);
1774 }
1775
1776 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1778 let persona = match level {
1779 1 => UserPersona::Manager,
1780 2 => UserPersona::Controller,
1781 _ => UserPersona::Executive,
1782 };
1783
1784 if let Some(ref pool) = self.user_pool {
1786 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1787 return (user.user_id.clone(), persona);
1788 }
1789 }
1790
1791 let approver_id = match persona {
1793 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1794 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1795 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1796 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1797 };
1798
1799 (approver_id, persona)
1800 }
1801
1802 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1804 match persona_str.to_lowercase().as_str() {
1805 s if s.contains("junior") => UserPersona::JuniorAccountant,
1806 s if s.contains("senior") => UserPersona::SeniorAccountant,
1807 s if s.contains("controller") => UserPersona::Controller,
1808 s if s.contains("manager") => UserPersona::Manager,
1809 s if s.contains("executive") => UserPersona::Executive,
1810 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1811 _ => UserPersona::JuniorAccountant, }
1813 }
1814
1815 pub fn with_approval(mut self, enabled: bool) -> Self {
1817 self.approval_enabled = enabled;
1818 self
1819 }
1820
1821 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1823 self.approval_threshold = threshold;
1824 self
1825 }
1826
1827 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1833 self.sod_violation_rate = rate;
1834 self
1835 }
1836
1837 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1840 if let Some(ref workflow) = entry.header.approval_workflow {
1841 let last_approver = workflow
1843 .actions
1844 .iter()
1845 .rev()
1846 .find(|a| matches!(a.action, ApprovalActionType::Approve));
1847
1848 if let Some(approver_action) = last_approver {
1849 entry.header.approved_by = Some(approver_action.actor_id.clone());
1850 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1851 } else {
1852 entry.header.approved_by = Some(workflow.preparer_id.clone());
1854 entry.header.approval_date = Some(posting_date);
1855 }
1856
1857 if self.rng.random::<f64>() < self.sod_violation_rate {
1859 let creator = entry.header.created_by.clone();
1860 entry.header.approved_by = Some(creator);
1861 entry.header.sod_violation = true;
1862 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1863 }
1864 }
1865 }
1866
1867 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1873 self.drift_controller = Some(controller);
1874 self
1875 }
1876
1877 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1882 if config.enabled {
1883 let total_periods = self.calculate_total_periods();
1884 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1885 }
1886 self
1887 }
1888
1889 fn calculate_total_periods(&self) -> u32 {
1891 let start_year = self.start_date.year();
1892 let start_month = self.start_date.month();
1893 let end_year = self.end_date.year();
1894 let end_month = self.end_date.month();
1895
1896 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1897 }
1898
1899 fn date_to_period(&self, date: NaiveDate) -> u32 {
1901 let start_year = self.start_date.year();
1902 let start_month = self.start_date.month() as i32;
1903 let date_year = date.year();
1904 let date_month = date.month() as i32;
1905
1906 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1907 }
1908
1909 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1911 if let Some(ref controller) = self.drift_controller {
1912 let period = self.date_to_period(date);
1913 controller.compute_adjustments(period)
1914 } else {
1915 DriftAdjustments::none()
1916 }
1917 }
1918
1919 #[inline]
1921 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1922 if let Some(ref pool) = self.user_pool {
1923 let persona = if is_automated {
1924 UserPersona::AutomatedSystem
1925 } else {
1926 let roll: f64 = self.rng.random();
1928 if roll < 0.4 {
1929 UserPersona::JuniorAccountant
1930 } else if roll < 0.7 {
1931 UserPersona::SeniorAccountant
1932 } else if roll < 0.85 {
1933 UserPersona::Controller
1934 } else {
1935 UserPersona::Manager
1936 }
1937 };
1938
1939 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1940 return (user.user_id.clone(), user.persona.to_string());
1941 }
1942 }
1943
1944 if is_automated {
1946 (
1947 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1948 "automated_system".to_string(),
1949 )
1950 } else {
1951 (
1952 format!("USER{:04}", self.rng.random_range(1..=40)),
1953 "senior_accountant".to_string(),
1954 )
1955 }
1956 }
1957
1958 #[inline]
1960 fn select_source(&mut self) -> TransactionSource {
1961 let roll: f64 = self.rng.random();
1962 let dist = &self.config.source_distribution;
1963
1964 if roll < dist.manual {
1965 TransactionSource::Manual
1966 } else if roll < dist.manual + dist.automated {
1967 TransactionSource::Automated
1968 } else if roll < dist.manual + dist.automated + dist.recurring {
1969 TransactionSource::Recurring
1970 } else {
1971 TransactionSource::Adjustment
1972 }
1973 }
1974
1975 #[inline]
1977 fn document_type_for_process(process: BusinessProcess) -> &'static str {
1986 match process {
1987 BusinessProcess::P2P => "KR",
1988 BusinessProcess::O2C => "DR",
1989 BusinessProcess::R2R => "SA",
1990 BusinessProcess::H2R => "HR",
1991 BusinessProcess::A2R => "AA",
1992 _ => "SA",
1993 }
1994 }
1995
1996 fn select_business_process(&mut self) -> BusinessProcess {
1997 *datasynth_core::utils::weighted_select(&mut self.rng, &self.business_process_weights)
1998 }
1999
2000 #[inline]
2001 fn select_debit_account(&mut self) -> &GLAccount {
2002 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
2003 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
2004
2005 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2007 accounts
2008 } else {
2009 expense_accounts
2010 };
2011
2012 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2013 tracing::warn!(
2014 "Account selection returned empty list, falling back to first COA account"
2015 );
2016 &self.coa.accounts[0]
2017 })
2018 }
2019
2020 #[inline]
2021 fn select_credit_account(&mut self) -> &GLAccount {
2022 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
2023 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
2024
2025 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
2027 liability_accounts
2028 } else {
2029 revenue_accounts
2030 };
2031
2032 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
2033 tracing::warn!(
2034 "Account selection returned empty list, falling back to first COA account"
2035 );
2036 &self.coa.accounts[0]
2037 })
2038 }
2039}
2040
2041impl Generator for JournalEntryGenerator {
2042 type Item = JournalEntry;
2043 type Config = (
2044 TransactionConfig,
2045 Arc<ChartOfAccounts>,
2046 Vec<String>,
2047 NaiveDate,
2048 NaiveDate,
2049 );
2050
2051 fn new(config: Self::Config, seed: u64) -> Self {
2052 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
2053 }
2054
2055 fn generate_one(&mut self) -> Self::Item {
2056 self.generate()
2057 }
2058
2059 fn reset(&mut self) {
2060 self.rng = seeded_rng(self.seed, 0);
2061 self.line_sampler.reset(self.seed + 1);
2062 self.amount_sampler.reset(self.seed + 2);
2063 self.temporal_sampler.reset(self.seed + 3);
2064 self.count = 0;
2065 self.uuid_factory.reset();
2066
2067 let mut ref_gen = ReferenceGenerator::new(
2069 self.start_date.year(),
2070 self.companies
2071 .first()
2072 .map(std::string::String::as_str)
2073 .unwrap_or("1000"),
2074 );
2075 ref_gen.set_prefix(
2076 ReferenceType::Invoice,
2077 &self.template_config.references.invoice_prefix,
2078 );
2079 ref_gen.set_prefix(
2080 ReferenceType::PurchaseOrder,
2081 &self.template_config.references.po_prefix,
2082 );
2083 ref_gen.set_prefix(
2084 ReferenceType::SalesOrder,
2085 &self.template_config.references.so_prefix,
2086 );
2087 self.reference_generator = ref_gen;
2088 }
2089
2090 fn count(&self) -> u64 {
2091 self.count
2092 }
2093
2094 fn seed(&self) -> u64 {
2095 self.seed
2096 }
2097}
2098
2099use datasynth_core::traits::ParallelGenerator;
2100
2101impl ParallelGenerator for JournalEntryGenerator {
2102 fn split(self, parts: usize) -> Vec<Self> {
2108 let parts = parts.max(1);
2109 (0..parts)
2110 .map(|i| {
2111 let sub_seed = self
2113 .seed
2114 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2115
2116 let mut gen = JournalEntryGenerator::new_with_full_config(
2117 self.config.clone(),
2118 Arc::clone(&self.coa),
2119 self.companies.clone(),
2120 self.start_date,
2121 self.end_date,
2122 sub_seed,
2123 self.template_config.clone(),
2124 self.user_pool.clone(),
2125 );
2126
2127 gen.company_selector = self.company_selector.clone();
2129 gen.vendor_pool = self.vendor_pool.clone();
2130 gen.customer_pool = self.customer_pool.clone();
2131 gen.material_pool = self.material_pool.clone();
2132 gen.using_real_master_data = self.using_real_master_data;
2133 gen.fraud_config = self.fraud_config.clone();
2134 gen.persona_errors_enabled = self.persona_errors_enabled;
2135 gen.approval_enabled = self.approval_enabled;
2136 gen.approval_threshold = self.approval_threshold;
2137 gen.sod_violation_rate = self.sod_violation_rate;
2138
2139 gen.uuid_factory = DeterministicUuidFactory::for_partition(
2141 sub_seed,
2142 GeneratorType::JournalEntry,
2143 i as u8,
2144 );
2145
2146 if let Some(ref config) = self.temporal_patterns_config {
2148 gen.temporal_patterns_config = Some(config.clone());
2149 if config.business_days.enabled {
2151 if let Some(ref bdc) = self.business_day_calculator {
2152 gen.business_day_calculator = Some(bdc.clone());
2153 }
2154 }
2155 if config.processing_lags.enabled {
2157 let lag_config =
2158 Self::convert_processing_lag_config(&config.processing_lags);
2159 gen.processing_lag_calculator =
2160 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2161 }
2162 }
2163
2164 if let Some(ref dc) = self.drift_controller {
2166 gen.drift_controller = Some(dc.clone());
2167 }
2168
2169 gen
2170 })
2171 .collect()
2172 }
2173}
2174
2175#[cfg(test)]
2176#[allow(clippy::unwrap_used)]
2177mod tests {
2178 use super::*;
2179 use crate::ChartOfAccountsGenerator;
2180
2181 #[test]
2182 fn test_generate_balanced_entries() {
2183 let mut coa_gen =
2184 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2185 let coa = Arc::new(coa_gen.generate());
2186
2187 let mut je_gen = JournalEntryGenerator::new_with_params(
2188 TransactionConfig::default(),
2189 coa,
2190 vec!["1000".to_string()],
2191 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2192 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2193 42,
2194 );
2195
2196 let mut balanced_count = 0;
2197 for _ in 0..100 {
2198 let entry = je_gen.generate();
2199
2200 let has_human_error = entry
2202 .header
2203 .header_text
2204 .as_ref()
2205 .map(|t| t.contains("[HUMAN_ERROR:"))
2206 .unwrap_or(false);
2207
2208 if !has_human_error {
2209 assert!(
2210 entry.is_balanced(),
2211 "Entry {:?} is not balanced",
2212 entry.header.document_id
2213 );
2214 balanced_count += 1;
2215 }
2216 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2217 }
2218
2219 assert!(
2221 balanced_count >= 80,
2222 "Expected at least 80 balanced entries, got {}",
2223 balanced_count
2224 );
2225 }
2226
2227 #[test]
2228 fn test_deterministic_generation() {
2229 let mut coa_gen =
2230 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2231 let coa = Arc::new(coa_gen.generate());
2232
2233 let mut gen1 = JournalEntryGenerator::new_with_params(
2234 TransactionConfig::default(),
2235 Arc::clone(&coa),
2236 vec!["1000".to_string()],
2237 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2238 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2239 42,
2240 );
2241
2242 let mut gen2 = JournalEntryGenerator::new_with_params(
2243 TransactionConfig::default(),
2244 coa,
2245 vec!["1000".to_string()],
2246 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2247 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2248 42,
2249 );
2250
2251 for _ in 0..50 {
2252 let e1 = gen1.generate();
2253 let e2 = gen2.generate();
2254 assert_eq!(e1.header.document_id, e2.header.document_id);
2255 assert_eq!(e1.total_debit(), e2.total_debit());
2256 }
2257 }
2258
2259 #[test]
2260 fn test_templates_generate_descriptions() {
2261 let mut coa_gen =
2262 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2263 let coa = Arc::new(coa_gen.generate());
2264
2265 let template_config = TemplateConfig {
2267 names: datasynth_config::schema::NameTemplateConfig {
2268 generate_realistic_names: true,
2269 email_domain: "test.com".to_string(),
2270 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2271 },
2272 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2273 generate_header_text: true,
2274 generate_line_text: true,
2275 },
2276 references: datasynth_config::schema::ReferenceTemplateConfig {
2277 generate_references: true,
2278 invoice_prefix: "TEST-INV".to_string(),
2279 po_prefix: "TEST-PO".to_string(),
2280 so_prefix: "TEST-SO".to_string(),
2281 },
2282 };
2283
2284 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2285 TransactionConfig::default(),
2286 coa,
2287 vec!["1000".to_string()],
2288 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2289 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2290 42,
2291 template_config,
2292 None,
2293 )
2294 .with_persona_errors(false); for _ in 0..10 {
2297 let entry = je_gen.generate();
2298
2299 assert!(
2301 entry.header.header_text.is_some(),
2302 "Header text should be populated"
2303 );
2304
2305 assert!(
2307 entry.header.reference.is_some(),
2308 "Reference should be populated"
2309 );
2310
2311 assert!(
2313 entry.header.business_process.is_some(),
2314 "Business process should be set"
2315 );
2316
2317 for line in &entry.lines {
2319 assert!(line.line_text.is_some(), "Line text should be populated");
2320 }
2321
2322 assert!(entry.is_balanced());
2324 }
2325 }
2326
2327 #[test]
2328 fn test_user_pool_integration() {
2329 let mut coa_gen =
2330 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2331 let coa = Arc::new(coa_gen.generate());
2332
2333 let companies = vec!["1000".to_string()];
2334
2335 let mut user_gen = crate::UserGenerator::new(42);
2337 let user_pool = user_gen.generate_standard(&companies);
2338
2339 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2340 TransactionConfig::default(),
2341 coa,
2342 companies,
2343 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2344 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2345 42,
2346 TemplateConfig::default(),
2347 Some(user_pool),
2348 );
2349
2350 for _ in 0..20 {
2352 let entry = je_gen.generate();
2353
2354 assert!(!entry.header.created_by.is_empty());
2357 }
2358 }
2359
2360 #[test]
2361 fn test_master_data_connection() {
2362 let mut coa_gen =
2363 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2364 let coa = Arc::new(coa_gen.generate());
2365
2366 let vendors = vec![
2368 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2369 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2370 ];
2371
2372 let customers = vec![
2374 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2375 Customer::new(
2376 "C-TEST-002",
2377 "Test Customer Two",
2378 CustomerType::SmallBusiness,
2379 ),
2380 ];
2381
2382 let materials = vec![Material::new(
2384 "MAT-TEST-001",
2385 "Test Material A",
2386 MaterialType::RawMaterial,
2387 )];
2388
2389 let generator = JournalEntryGenerator::new_with_params(
2391 TransactionConfig::default(),
2392 coa,
2393 vec!["1000".to_string()],
2394 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2395 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2396 42,
2397 );
2398
2399 assert!(!generator.is_using_real_master_data());
2401
2402 let generator_with_data = generator
2404 .with_vendors(&vendors)
2405 .with_customers(&customers)
2406 .with_materials(&materials);
2407
2408 assert!(generator_with_data.is_using_real_master_data());
2410 }
2411
2412 #[test]
2413 fn test_with_master_data_convenience_method() {
2414 let mut coa_gen =
2415 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2416 let coa = Arc::new(coa_gen.generate());
2417
2418 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2419 let customers = vec![Customer::new(
2420 "C-001",
2421 "Customer One",
2422 CustomerType::Corporate,
2423 )];
2424 let materials = vec![Material::new(
2425 "MAT-001",
2426 "Material One",
2427 MaterialType::RawMaterial,
2428 )];
2429
2430 let generator = JournalEntryGenerator::new_with_params(
2431 TransactionConfig::default(),
2432 coa,
2433 vec!["1000".to_string()],
2434 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2435 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2436 42,
2437 )
2438 .with_master_data(&vendors, &customers, &materials);
2439
2440 assert!(generator.is_using_real_master_data());
2441 }
2442
2443 #[test]
2444 fn test_stress_factors_increase_error_rate() {
2445 let mut coa_gen =
2446 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2447 let coa = Arc::new(coa_gen.generate());
2448
2449 let generator = JournalEntryGenerator::new_with_params(
2450 TransactionConfig::default(),
2451 coa,
2452 vec!["1000".to_string()],
2453 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2454 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2455 42,
2456 );
2457
2458 let base_rate = 0.1;
2459
2460 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2463 assert!(
2464 (regular_rate - base_rate).abs() < 0.01,
2465 "Regular day should have minimal stress factor adjustment"
2466 );
2467
2468 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2471 assert!(
2472 month_end_rate > regular_rate,
2473 "Month end should have higher error rate than regular day"
2474 );
2475
2476 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2479 assert!(
2480 year_end_rate > month_end_rate,
2481 "Year end should have highest error rate"
2482 );
2483
2484 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2487 assert!(
2488 friday_rate > regular_rate,
2489 "Friday should have higher error rate than mid-week"
2490 );
2491
2492 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2495 assert!(
2496 monday_rate > regular_rate,
2497 "Monday should have higher error rate than mid-week"
2498 );
2499 }
2500
2501 #[test]
2502 fn test_batching_produces_similar_entries() {
2503 let mut coa_gen =
2504 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2505 let coa = Arc::new(coa_gen.generate());
2506
2507 let mut je_gen = JournalEntryGenerator::new_with_params(
2509 TransactionConfig::default(),
2510 coa,
2511 vec!["1000".to_string()],
2512 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2513 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2514 123,
2515 )
2516 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2520
2521 for entry in &entries {
2523 assert!(
2524 entry.is_balanced(),
2525 "All entries including batched should be balanced"
2526 );
2527 }
2528
2529 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2531 std::collections::HashMap::new();
2532 for entry in &entries {
2533 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2534 }
2535
2536 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2538 assert!(
2539 dates_with_multiple > 0,
2540 "With batching, should see some dates with multiple entries"
2541 );
2542 }
2543
2544 #[test]
2545 fn test_temporal_patterns_business_days() {
2546 use datasynth_config::schema::{
2547 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2548 };
2549
2550 let mut coa_gen =
2551 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2552 let coa = Arc::new(coa_gen.generate());
2553
2554 let temporal_config = TemporalPatternsConfig {
2556 enabled: true,
2557 business_days: BusinessDaySchemaConfig {
2558 enabled: true,
2559 ..Default::default()
2560 },
2561 calendars: CalendarSchemaConfig {
2562 regions: vec!["US".to_string()],
2563 custom_holidays: vec![],
2564 },
2565 ..Default::default()
2566 };
2567
2568 let mut je_gen = JournalEntryGenerator::new_with_params(
2569 TransactionConfig::default(),
2570 coa,
2571 vec!["1000".to_string()],
2572 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2573 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2575 )
2576 .with_temporal_patterns(temporal_config, 42)
2577 .with_persona_errors(false);
2578
2579 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2581
2582 for entry in &entries {
2583 let weekday = entry.header.posting_date.weekday();
2584 assert!(
2585 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2586 "Posting date {:?} should not be a weekend",
2587 entry.header.posting_date
2588 );
2589 }
2590 }
2591
2592 #[test]
2593 fn test_default_generation_filters_weekends() {
2594 let mut coa_gen =
2598 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2599 let coa = Arc::new(coa_gen.generate());
2600
2601 let mut je_gen = JournalEntryGenerator::new_with_params(
2602 TransactionConfig::default(),
2603 coa,
2604 vec!["1000".to_string()],
2605 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2606 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2607 42,
2608 )
2609 .with_persona_errors(false);
2610
2611 let total = 500;
2612 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2613
2614 let weekend_count = entries
2615 .iter()
2616 .filter(|e| {
2617 let wd = e.header.posting_date.weekday();
2618 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2619 })
2620 .count();
2621
2622 let weekend_pct = weekend_count as f64 / total as f64;
2623 assert!(
2624 weekend_pct < 0.05,
2625 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2626 but got {:.1}% ({}/{})",
2627 weekend_pct * 100.0,
2628 weekend_count,
2629 total
2630 );
2631 }
2632
2633 #[test]
2634 fn test_document_type_derived_from_business_process() {
2635 let mut coa_gen =
2636 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2637 let coa = Arc::new(coa_gen.generate());
2638
2639 let mut je_gen = JournalEntryGenerator::new_with_params(
2640 TransactionConfig::default(),
2641 coa,
2642 vec!["1000".to_string()],
2643 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2644 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2645 99,
2646 )
2647 .with_persona_errors(false)
2648 .with_batching(false);
2649
2650 let total = 200;
2651 let mut doc_types = std::collections::HashSet::new();
2652 let mut sa_count = 0_usize;
2653
2654 for _ in 0..total {
2655 let entry = je_gen.generate();
2656 let dt = &entry.header.document_type;
2657 doc_types.insert(dt.clone());
2658 if dt == "SA" {
2659 sa_count += 1;
2660 }
2661 }
2662
2663 assert!(
2665 doc_types.len() > 3,
2666 "Expected >3 distinct document types, got {} ({:?})",
2667 doc_types.len(),
2668 doc_types,
2669 );
2670
2671 let sa_pct = sa_count as f64 / total as f64;
2673 assert!(
2674 sa_pct < 0.50,
2675 "Expected SA <50%, got {:.1}% ({}/{})",
2676 sa_pct * 100.0,
2677 sa_count,
2678 total,
2679 );
2680 }
2681
2682 #[test]
2683 fn test_enrich_line_items_account_description() {
2684 let mut coa_gen =
2685 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2686 let coa = Arc::new(coa_gen.generate());
2687
2688 let mut je_gen = JournalEntryGenerator::new_with_params(
2689 TransactionConfig::default(),
2690 coa,
2691 vec!["1000".to_string()],
2692 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2693 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2694 42,
2695 )
2696 .with_persona_errors(false);
2697
2698 let total = 200;
2699 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2700
2701 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2703 let lines_with_desc: usize = entries
2704 .iter()
2705 .flat_map(|e| &e.lines)
2706 .filter(|l| l.account_description.is_some())
2707 .count();
2708
2709 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2710 assert!(
2711 desc_pct > 0.95,
2712 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2713 desc_pct * 100.0,
2714 lines_with_desc,
2715 total_lines,
2716 );
2717 }
2718
2719 #[test]
2720 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2721 let mut coa_gen =
2722 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2723 let coa = Arc::new(coa_gen.generate());
2724
2725 let mut je_gen = JournalEntryGenerator::new_with_params(
2726 TransactionConfig::default(),
2727 coa,
2728 vec!["1000".to_string()],
2729 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2730 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2731 42,
2732 )
2733 .with_persona_errors(false);
2734
2735 let total = 300;
2736 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2737
2738 let expense_lines: Vec<&JournalEntryLine> = entries
2740 .iter()
2741 .flat_map(|e| &e.lines)
2742 .filter(|l| {
2743 let first = l.gl_account.chars().next().unwrap_or('0');
2744 first == '5' || first == '6'
2745 })
2746 .collect();
2747
2748 if !expense_lines.is_empty() {
2749 let with_cc = expense_lines
2750 .iter()
2751 .filter(|l| l.cost_center.is_some())
2752 .count();
2753 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2754 assert!(
2755 cc_pct > 0.80,
2756 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2757 cc_pct * 100.0,
2758 with_cc,
2759 expense_lines.len(),
2760 );
2761 }
2762 }
2763
2764 #[test]
2765 fn test_enrich_line_items_profit_center_and_line_text() {
2766 let mut coa_gen =
2767 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2768 let coa = Arc::new(coa_gen.generate());
2769
2770 let mut je_gen = JournalEntryGenerator::new_with_params(
2771 TransactionConfig::default(),
2772 coa,
2773 vec!["1000".to_string()],
2774 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2775 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2776 42,
2777 )
2778 .with_persona_errors(false);
2779
2780 let total = 100;
2781 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2782
2783 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2784
2785 let with_pc = entries
2787 .iter()
2788 .flat_map(|e| &e.lines)
2789 .filter(|l| l.profit_center.is_some())
2790 .count();
2791 let pc_pct = with_pc as f64 / total_lines as f64;
2792 assert!(
2793 pc_pct > 0.95,
2794 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2795 pc_pct * 100.0,
2796 with_pc,
2797 total_lines,
2798 );
2799
2800 let with_text = entries
2802 .iter()
2803 .flat_map(|e| &e.lines)
2804 .filter(|l| l.line_text.is_some())
2805 .count();
2806 let text_pct = with_text as f64 / total_lines as f64;
2807 assert!(
2808 text_pct > 0.95,
2809 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2810 text_pct * 100.0,
2811 with_text,
2812 total_lines,
2813 );
2814 }
2815
2816 #[test]
2819 fn test_je_has_audit_flags() {
2820 let mut coa_gen =
2821 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2822 let coa = Arc::new(coa_gen.generate());
2823
2824 let mut je_gen = JournalEntryGenerator::new_with_params(
2825 TransactionConfig::default(),
2826 coa,
2827 vec!["1000".to_string()],
2828 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2829 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2830 42,
2831 )
2832 .with_persona_errors(false);
2833
2834 for _ in 0..100 {
2835 let entry = je_gen.generate();
2836
2837 assert!(
2839 !entry.header.source_system.is_empty(),
2840 "source_system should be populated, got empty string"
2841 );
2842
2843 assert!(
2845 !entry.header.created_by.is_empty(),
2846 "created_by should be populated"
2847 );
2848
2849 assert!(
2851 entry.header.created_date.is_some(),
2852 "created_date should be populated"
2853 );
2854 }
2855 }
2856
2857 #[test]
2858 fn test_manual_entry_rate() {
2859 let mut coa_gen =
2860 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2861 let coa = Arc::new(coa_gen.generate());
2862
2863 let mut je_gen = JournalEntryGenerator::new_with_params(
2864 TransactionConfig::default(),
2865 coa,
2866 vec!["1000".to_string()],
2867 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2868 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2869 42,
2870 )
2871 .with_persona_errors(false)
2872 .with_batching(false);
2873
2874 let total = 1000;
2875 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2876
2877 let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
2878 let manual_rate = manual_count as f64 / total as f64;
2879
2880 assert!(
2883 manual_rate > 0.01 && manual_rate < 0.50,
2884 "Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
2885 manual_rate * 100.0,
2886 manual_count,
2887 total,
2888 );
2889
2890 for entry in &entries {
2892 let source_is_manual = entry.header.source == TransactionSource::Manual;
2893 assert_eq!(
2894 entry.header.is_manual, source_is_manual,
2895 "is_manual should match source == Manual"
2896 );
2897 }
2898 }
2899
2900 #[test]
2901 fn test_manual_source_consistency() {
2902 let mut coa_gen =
2903 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2904 let coa = Arc::new(coa_gen.generate());
2905
2906 let mut je_gen = JournalEntryGenerator::new_with_params(
2907 TransactionConfig::default(),
2908 coa,
2909 vec!["1000".to_string()],
2910 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2911 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2912 42,
2913 )
2914 .with_persona_errors(false)
2915 .with_batching(false);
2916
2917 for _ in 0..500 {
2918 let entry = je_gen.generate();
2919
2920 if entry.header.is_manual {
2921 assert!(
2923 entry.header.source_system == "manual"
2924 || entry.header.source_system == "spreadsheet",
2925 "Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
2926 entry.header.source_system,
2927 );
2928 } else {
2929 assert!(
2931 entry.header.source_system != "manual"
2932 && entry.header.source_system != "spreadsheet",
2933 "Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
2934 entry.header.source_system,
2935 );
2936 }
2937 }
2938 }
2939
2940 #[test]
2941 fn test_created_date_before_posting() {
2942 let mut coa_gen =
2943 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2944 let coa = Arc::new(coa_gen.generate());
2945
2946 let mut je_gen = JournalEntryGenerator::new_with_params(
2947 TransactionConfig::default(),
2948 coa,
2949 vec!["1000".to_string()],
2950 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2951 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2952 42,
2953 )
2954 .with_persona_errors(false);
2955
2956 for _ in 0..500 {
2957 let entry = je_gen.generate();
2958
2959 if let Some(created_date) = entry.header.created_date {
2960 let created_naive_date = created_date.date();
2961 assert!(
2962 created_naive_date <= entry.header.posting_date,
2963 "created_date ({}) should be <= posting_date ({})",
2964 created_naive_date,
2965 entry.header.posting_date,
2966 );
2967 }
2968 }
2969 }
2970}