1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 sod_violation_rate: f64,
67 batch_state: Option<BatchState>,
69 drift_controller: Option<DriftController>,
71 business_day_calculator: Option<BusinessDayCalculator>,
73 processing_lag_calculator: Option<ProcessingLagCalculator>,
74 temporal_patterns_config: Option<TemporalPatternsConfig>,
75}
76
77#[derive(Clone)]
82struct BatchState {
83 base_account_number: String,
85 base_amount: rust_decimal::Decimal,
86 base_business_process: Option<BusinessProcess>,
87 base_posting_date: NaiveDate,
88 remaining: u8,
90}
91
92impl JournalEntryGenerator {
93 pub fn new_with_params(
95 config: TransactionConfig,
96 coa: Arc<ChartOfAccounts>,
97 companies: Vec<String>,
98 start_date: NaiveDate,
99 end_date: NaiveDate,
100 seed: u64,
101 ) -> Self {
102 Self::new_with_full_config(
103 config,
104 coa,
105 companies,
106 start_date,
107 end_date,
108 seed,
109 TemplateConfig::default(),
110 None,
111 )
112 }
113
114 #[allow(clippy::too_many_arguments)]
116 pub fn new_with_full_config(
117 config: TransactionConfig,
118 coa: Arc<ChartOfAccounts>,
119 companies: Vec<String>,
120 start_date: NaiveDate,
121 end_date: NaiveDate,
122 seed: u64,
123 template_config: TemplateConfig,
124 user_pool: Option<UserPool>,
125 ) -> Self {
126 let user_pool = user_pool.or_else(|| {
128 if template_config.names.generate_realistic_names {
129 let user_gen_config = UserGeneratorConfig {
130 culture_distribution: vec![
131 (
132 datasynth_core::templates::NameCulture::WesternUs,
133 template_config.names.culture_distribution.western_us,
134 ),
135 (
136 datasynth_core::templates::NameCulture::Hispanic,
137 template_config.names.culture_distribution.hispanic,
138 ),
139 (
140 datasynth_core::templates::NameCulture::German,
141 template_config.names.culture_distribution.german,
142 ),
143 (
144 datasynth_core::templates::NameCulture::French,
145 template_config.names.culture_distribution.french,
146 ),
147 (
148 datasynth_core::templates::NameCulture::Chinese,
149 template_config.names.culture_distribution.chinese,
150 ),
151 (
152 datasynth_core::templates::NameCulture::Japanese,
153 template_config.names.culture_distribution.japanese,
154 ),
155 (
156 datasynth_core::templates::NameCulture::Indian,
157 template_config.names.culture_distribution.indian,
158 ),
159 ],
160 email_domain: template_config.names.email_domain.clone(),
161 generate_realistic_names: true,
162 };
163 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
164 Some(user_gen.generate_standard(&companies))
165 } else {
166 None
167 }
168 });
169
170 let mut ref_gen = ReferenceGenerator::new(
172 start_date.year(),
173 companies
174 .first()
175 .map(std::string::String::as_str)
176 .unwrap_or("1000"),
177 );
178 ref_gen.set_prefix(
179 ReferenceType::Invoice,
180 &template_config.references.invoice_prefix,
181 );
182 ref_gen.set_prefix(
183 ReferenceType::PurchaseOrder,
184 &template_config.references.po_prefix,
185 );
186 ref_gen.set_prefix(
187 ReferenceType::SalesOrder,
188 &template_config.references.so_prefix,
189 );
190
191 let company_selector = WeightedCompanySelector::uniform(companies.clone());
193
194 Self {
195 rng: seeded_rng(seed, 0),
196 seed,
197 config: config.clone(),
198 coa,
199 companies,
200 company_selector,
201 line_sampler: LineItemSampler::with_config(
202 seed + 1,
203 config.line_item_distribution.clone(),
204 config.even_odd_distribution.clone(),
205 config.debit_credit_distribution.clone(),
206 ),
207 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
208 temporal_sampler: TemporalSampler::with_config(
209 seed + 3,
210 config.seasonality.clone(),
211 WorkingHoursConfig::default(),
212 Vec::new(),
213 ),
214 start_date,
215 end_date,
216 count: 0,
217 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
218 user_pool,
219 description_generator: DescriptionGenerator::new(),
220 reference_generator: ref_gen,
221 template_config,
222 vendor_pool: VendorPool::standard(),
223 customer_pool: CustomerPool::standard(),
224 material_pool: None,
225 using_real_master_data: false,
226 fraud_config: FraudConfig::default(),
227 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
232 drift_controller: None,
233 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
236 Region::US,
237 start_date.year(),
238 ))),
239 processing_lag_calculator: None,
240 temporal_patterns_config: None,
241 }
242 }
243
244 pub fn from_generator_config(
249 full_config: &GeneratorConfig,
250 coa: Arc<ChartOfAccounts>,
251 start_date: NaiveDate,
252 end_date: NaiveDate,
253 seed: u64,
254 ) -> Self {
255 let companies: Vec<String> = full_config
256 .companies
257 .iter()
258 .map(|c| c.code.clone())
259 .collect();
260
261 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
263
264 let mut generator = Self::new_with_full_config(
265 full_config.transactions.clone(),
266 coa,
267 companies,
268 start_date,
269 end_date,
270 seed,
271 full_config.templates.clone(),
272 None,
273 );
274
275 generator.company_selector = company_selector;
277
278 generator.fraud_config = full_config.fraud.clone();
280
281 let temporal_config = &full_config.temporal_patterns;
283 if temporal_config.enabled {
284 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
285 }
286
287 generator
288 }
289
290 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
297 if config.business_days.enabled {
299 let region = config
300 .calendars
301 .regions
302 .first()
303 .map(|r| Self::parse_region(r))
304 .unwrap_or(Region::US);
305
306 let calendar = HolidayCalendar::new(region, self.start_date.year());
307 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
308 }
309
310 if config.processing_lags.enabled {
312 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
313 self.processing_lag_calculator =
314 Some(ProcessingLagCalculator::with_config(seed, lag_config));
315 }
316
317 let model = config.period_end.model.as_deref().unwrap_or("flat");
319 if model != "flat"
320 || config
321 .period_end
322 .month_end
323 .as_ref()
324 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
325 {
326 let dynamics = Self::convert_period_end_config(&config.period_end);
327 self.temporal_sampler.set_period_end_dynamics(dynamics);
328 }
329
330 self.temporal_patterns_config = Some(config);
331 self
332 }
333
334 pub fn with_country_pack_temporal(
342 mut self,
343 config: TemporalPatternsConfig,
344 seed: u64,
345 pack: &CountryPack,
346 ) -> Self {
347 if config.business_days.enabled {
349 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
350 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
351 }
352
353 if config.processing_lags.enabled {
355 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
356 self.processing_lag_calculator =
357 Some(ProcessingLagCalculator::with_config(seed, lag_config));
358 }
359
360 let model = config.period_end.model.as_deref().unwrap_or("flat");
362 if model != "flat"
363 || config
364 .period_end
365 .month_end
366 .as_ref()
367 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
368 {
369 let dynamics = Self::convert_period_end_config(&config.period_end);
370 self.temporal_sampler.set_period_end_dynamics(dynamics);
371 }
372
373 self.temporal_patterns_config = Some(config);
374 self
375 }
376
377 fn convert_processing_lag_config(
379 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
380 ) -> ProcessingLagConfig {
381 let mut config = ProcessingLagConfig {
382 enabled: schema.enabled,
383 ..Default::default()
384 };
385
386 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
388 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
389 if let Some(min) = lag.min_hours {
390 dist.min_lag_hours = min;
391 }
392 if let Some(max) = lag.max_hours {
393 dist.max_lag_hours = max;
394 }
395 dist
396 };
397
398 if let Some(ref lag) = schema.sales_order_lag {
400 config
401 .event_lags
402 .insert(EventType::SalesOrder, convert_lag(lag));
403 }
404 if let Some(ref lag) = schema.purchase_order_lag {
405 config
406 .event_lags
407 .insert(EventType::PurchaseOrder, convert_lag(lag));
408 }
409 if let Some(ref lag) = schema.goods_receipt_lag {
410 config
411 .event_lags
412 .insert(EventType::GoodsReceipt, convert_lag(lag));
413 }
414 if let Some(ref lag) = schema.invoice_receipt_lag {
415 config
416 .event_lags
417 .insert(EventType::InvoiceReceipt, convert_lag(lag));
418 }
419 if let Some(ref lag) = schema.invoice_issue_lag {
420 config
421 .event_lags
422 .insert(EventType::InvoiceIssue, convert_lag(lag));
423 }
424 if let Some(ref lag) = schema.payment_lag {
425 config
426 .event_lags
427 .insert(EventType::Payment, convert_lag(lag));
428 }
429 if let Some(ref lag) = schema.journal_entry_lag {
430 config
431 .event_lags
432 .insert(EventType::JournalEntry, convert_lag(lag));
433 }
434
435 if let Some(ref cross_day) = schema.cross_day_posting {
437 config.cross_day = CrossDayConfig {
438 enabled: cross_day.enabled,
439 probability_by_hour: cross_day.probability_by_hour.clone(),
440 ..Default::default()
441 };
442 }
443
444 config
445 }
446
447 fn convert_period_end_config(
449 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
450 ) -> PeriodEndDynamics {
451 let model_type = schema.model.as_deref().unwrap_or("exponential");
452
453 let convert_period =
455 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
456 default_peak: f64|
457 -> PeriodEndConfig {
458 if let Some(p) = period {
459 let model = match model_type {
460 "flat" => PeriodEndModel::FlatMultiplier {
461 multiplier: p.peak_multiplier.unwrap_or(default_peak),
462 },
463 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
464 start_day: p.start_day.unwrap_or(-10),
465 sustained_high_days: p.sustained_high_days.unwrap_or(3),
466 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467 ramp_up_days: 3, },
469 _ => PeriodEndModel::ExponentialAcceleration {
470 start_day: p.start_day.unwrap_or(-10),
471 base_multiplier: p.base_multiplier.unwrap_or(1.0),
472 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
473 decay_rate: p.decay_rate.unwrap_or(0.3),
474 },
475 };
476 PeriodEndConfig {
477 enabled: true,
478 model,
479 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
480 }
481 } else {
482 PeriodEndConfig {
483 enabled: true,
484 model: PeriodEndModel::ExponentialAcceleration {
485 start_day: -10,
486 base_multiplier: 1.0,
487 peak_multiplier: default_peak,
488 decay_rate: 0.3,
489 },
490 additional_multiplier: 1.0,
491 }
492 }
493 };
494
495 PeriodEndDynamics::new(
496 convert_period(schema.month_end.as_ref(), 2.0),
497 convert_period(schema.quarter_end.as_ref(), 3.5),
498 convert_period(schema.year_end.as_ref(), 5.0),
499 )
500 }
501
502 fn parse_region(region_str: &str) -> Region {
504 match region_str.to_uppercase().as_str() {
505 "US" => Region::US,
506 "DE" => Region::DE,
507 "GB" => Region::GB,
508 "CN" => Region::CN,
509 "JP" => Region::JP,
510 "IN" => Region::IN,
511 "BR" => Region::BR,
512 "MX" => Region::MX,
513 "AU" => Region::AU,
514 "SG" => Region::SG,
515 "KR" => Region::KR,
516 "FR" => Region::FR,
517 "IT" => Region::IT,
518 "ES" => Region::ES,
519 "CA" => Region::CA,
520 _ => Region::US,
521 }
522 }
523
524 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
526 self.company_selector = selector;
527 }
528
529 pub fn company_selector(&self) -> &WeightedCompanySelector {
531 &self.company_selector
532 }
533
534 pub fn set_fraud_config(&mut self, config: FraudConfig) {
536 self.fraud_config = config;
537 }
538
539 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
544 if !vendors.is_empty() {
545 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
546 self.using_real_master_data = true;
547 }
548 self
549 }
550
551 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
556 if !customers.is_empty() {
557 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
558 self.using_real_master_data = true;
559 }
560 self
561 }
562
563 pub fn with_materials(mut self, materials: &[Material]) -> Self {
567 if !materials.is_empty() {
568 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
569 self.using_real_master_data = true;
570 }
571 self
572 }
573
574 pub fn with_master_data(
579 self,
580 vendors: &[Vendor],
581 customers: &[Customer],
582 materials: &[Material],
583 ) -> Self {
584 self.with_vendors(vendors)
585 .with_customers(customers)
586 .with_materials(materials)
587 }
588
589 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
596 let name_gen =
597 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
598 let config = UserGeneratorConfig {
599 culture_distribution: Vec::new(),
602 email_domain: name_gen.email_domain().to_string(),
603 generate_realistic_names: true,
604 };
605 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
606 self.user_pool = Some(user_gen.generate_standard(&self.companies));
607 self
608 }
609
610 pub fn is_using_real_master_data(&self) -> bool {
612 self.using_real_master_data
613 }
614
615 fn determine_fraud(&mut self) -> Option<FraudType> {
617 if !self.fraud_config.enabled {
618 return None;
619 }
620
621 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
623 return None;
624 }
625
626 Some(self.select_fraud_type())
628 }
629
630 fn select_fraud_type(&mut self) -> FraudType {
632 let dist = &self.fraud_config.fraud_type_distribution;
633 let roll: f64 = self.rng.random();
634
635 let mut cumulative = 0.0;
636
637 cumulative += dist.suspense_account_abuse;
638 if roll < cumulative {
639 return FraudType::SuspenseAccountAbuse;
640 }
641
642 cumulative += dist.fictitious_transaction;
643 if roll < cumulative {
644 return FraudType::FictitiousTransaction;
645 }
646
647 cumulative += dist.revenue_manipulation;
648 if roll < cumulative {
649 return FraudType::RevenueManipulation;
650 }
651
652 cumulative += dist.expense_capitalization;
653 if roll < cumulative {
654 return FraudType::ExpenseCapitalization;
655 }
656
657 cumulative += dist.split_transaction;
658 if roll < cumulative {
659 return FraudType::SplitTransaction;
660 }
661
662 cumulative += dist.timing_anomaly;
663 if roll < cumulative {
664 return FraudType::TimingAnomaly;
665 }
666
667 cumulative += dist.unauthorized_access;
668 if roll < cumulative {
669 return FraudType::UnauthorizedAccess;
670 }
671
672 FraudType::DuplicatePayment
674 }
675
676 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
678 match fraud_type {
679 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
680 FraudAmountPattern::ThresholdAdjacent
681 }
682 FraudType::FictitiousTransaction
683 | FraudType::FictitiousEntry
684 | FraudType::SuspenseAccountAbuse
685 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
686 FraudType::RevenueManipulation
687 | FraudType::ExpenseCapitalization
688 | FraudType::ImproperCapitalization
689 | FraudType::ReserveManipulation
690 | FraudType::UnauthorizedAccess
691 | FraudType::PrematureRevenue
692 | FraudType::UnderstatedLiabilities
693 | FraudType::OverstatedAssets
694 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
695 FraudType::DuplicatePayment
696 | FraudType::TimingAnomaly
697 | FraudType::SelfApproval
698 | FraudType::ExceededApprovalLimit
699 | FraudType::SegregationOfDutiesViolation
700 | FraudType::UnauthorizedApproval
701 | FraudType::CollusiveApproval
702 | FraudType::FictitiousVendor
703 | FraudType::ShellCompanyPayment
704 | FraudType::Kickback
705 | FraudType::KickbackScheme
706 | FraudType::InvoiceManipulation
707 | FraudType::AssetMisappropriation
708 | FraudType::InventoryTheft
709 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
710 FraudType::ImproperRevenueRecognition
712 | FraudType::ImproperPoAllocation
713 | FraudType::VariableConsiderationManipulation
714 | FraudType::ContractModificationMisstatement => {
715 FraudAmountPattern::StatisticallyImprobable
716 }
717 FraudType::LeaseClassificationManipulation
719 | FraudType::OffBalanceSheetLease
720 | FraudType::LeaseLiabilityUnderstatement
721 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
722 FraudType::FairValueHierarchyManipulation
724 | FraudType::Level3InputManipulation
725 | FraudType::ValuationTechniqueManipulation => {
726 FraudAmountPattern::StatisticallyImprobable
727 }
728 FraudType::DelayedImpairment
730 | FraudType::ImpairmentTestAvoidance
731 | FraudType::CashFlowProjectionManipulation
732 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
733 FraudType::BidRigging
735 | FraudType::PhantomVendorContract
736 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
737 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
738 FraudType::GhostEmployeePayroll
740 | FraudType::PayrollInflation
741 | FraudType::DuplicateExpenseReport
742 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
743 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
744 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
746 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
747 }
748 }
749
750 #[inline]
752 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
753 self.uuid_factory.next()
754 }
755
756 const COST_CENTER_POOL: &'static [&'static str] =
758 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
759
760 fn enrich_line_items(&self, entry: &mut JournalEntry) {
766 let posting_date = entry.header.posting_date;
767 let company_code = &entry.header.company_code;
768 let header_text = entry.header.header_text.clone();
769 let business_process = entry.header.business_process;
770
771 let doc_id_bytes = entry.header.document_id.as_bytes();
773 let mut cc_seed: usize = 0;
774 for &b in doc_id_bytes {
775 cc_seed = cc_seed.wrapping_add(b as usize);
776 }
777
778 for (i, line) in entry.lines.iter_mut().enumerate() {
779 if line.account_description.is_none() {
781 line.account_description = self
782 .coa
783 .get_account(&line.gl_account)
784 .map(|a| a.short_description.clone());
785 }
786
787 if line.cost_center.is_none() {
789 let first_char = line.gl_account.chars().next().unwrap_or('0');
790 if first_char == '5' || first_char == '6' {
791 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
792 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
793 }
794 }
795
796 if line.profit_center.is_none() {
798 let suffix = match business_process {
799 Some(BusinessProcess::P2P) => "-P2P",
800 Some(BusinessProcess::O2C) => "-O2C",
801 Some(BusinessProcess::R2R) => "-R2R",
802 Some(BusinessProcess::H2R) => "-H2R",
803 _ => "",
804 };
805 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
806 }
807
808 if line.line_text.is_none() {
810 line.line_text = header_text.clone();
811 }
812
813 if line.value_date.is_none()
815 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
816 {
817 line.value_date = Some(posting_date);
818 }
819
820 if line.assignment.is_none() {
822 if line.gl_account.starts_with("2000") {
823 if let Some(ref ht) = header_text {
825 if let Some(vendor_part) = ht.rsplit(" - ").next() {
827 if vendor_part.starts_with("V-")
828 || vendor_part.starts_with("VENDOR")
829 || vendor_part.starts_with("Vendor")
830 {
831 line.assignment = Some(vendor_part.to_string());
832 }
833 }
834 }
835 } else if line.gl_account.starts_with("1100") {
836 if let Some(ref ht) = header_text {
838 if let Some(customer_part) = ht.rsplit(" - ").next() {
839 if customer_part.starts_with("C-")
840 || customer_part.starts_with("CUST")
841 || customer_part.starts_with("Customer")
842 {
843 line.assignment = Some(customer_part.to_string());
844 }
845 }
846 }
847 }
848 }
849 }
850 }
851
852 pub fn generate(&mut self) -> JournalEntry {
854 debug!(
855 count = self.count,
856 companies = self.companies.len(),
857 start_date = %self.start_date,
858 end_date = %self.end_date,
859 "Generating journal entry"
860 );
861
862 if let Some(ref state) = self.batch_state {
864 if state.remaining > 0 {
865 return self.generate_batched_entry();
866 }
867 }
868
869 self.count += 1;
870
871 let document_id = self.generate_deterministic_uuid();
873
874 let mut posting_date = self
876 .temporal_sampler
877 .sample_date(self.start_date, self.end_date);
878
879 if let Some(ref calc) = self.business_day_calculator {
881 if !calc.is_business_day(posting_date) {
882 posting_date = calc.next_business_day(posting_date, false);
884 if posting_date > self.end_date {
886 posting_date = calc.prev_business_day(self.end_date, true);
887 }
888 }
889 }
890
891 let company_code = self.company_selector.select(&mut self.rng).to_string();
893
894 let line_spec = self.line_sampler.sample();
896
897 let source = self.select_source();
899 let is_automated = matches!(
900 source,
901 TransactionSource::Automated | TransactionSource::Recurring
902 );
903
904 let business_process = self.select_business_process();
906
907 let fraud_type = self.determine_fraud();
909 let is_fraud = fraud_type.is_some();
910
911 let time = self.temporal_sampler.sample_time(!is_automated);
913 let created_at = posting_date.and_time(time).and_utc();
914
915 let (created_by, user_persona) = self.select_user(is_automated);
917
918 let mut header =
920 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
921 header.created_at = created_at;
922 header.source = source;
923 header.created_by = created_by;
924 header.user_persona = user_persona;
925 header.business_process = Some(business_process);
926 header.document_type = Self::document_type_for_process(business_process).to_string();
927 header.is_fraud = is_fraud;
928 header.fraud_type = fraud_type;
929
930 let mut context =
932 DescriptionContext::with_period(posting_date.month(), posting_date.year());
933
934 match business_process {
936 BusinessProcess::P2P => {
937 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
938 context.vendor_name = Some(vendor.name.clone());
939 }
940 }
941 BusinessProcess::O2C => {
942 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
943 context.customer_name = Some(customer.name.clone());
944 }
945 }
946 _ => {}
947 }
948
949 if self.template_config.descriptions.generate_header_text {
951 header.header_text = Some(self.description_generator.generate_header_text(
952 business_process,
953 &context,
954 &mut self.rng,
955 ));
956 }
957
958 if self.template_config.references.generate_references {
960 header.reference = Some(
961 self.reference_generator
962 .generate_for_process_year(business_process, posting_date.year()),
963 );
964 }
965
966 header.source_document = header
968 .reference
969 .as_deref()
970 .and_then(DocumentRef::parse)
971 .or_else(|| {
972 if header.source == TransactionSource::Manual {
973 Some(DocumentRef::Manual)
974 } else {
975 None
976 }
977 });
978
979 let mut entry = JournalEntry::new(header);
981
982 let base_amount = if let Some(ft) = fraud_type {
984 let pattern = self.fraud_type_to_amount_pattern(ft);
985 self.amount_sampler.sample_fraud(pattern)
986 } else {
987 self.amount_sampler.sample()
988 };
989
990 let drift_adjusted_amount = {
992 let drift = self.get_drift_adjustments(posting_date);
993 if drift.amount_mean_multiplier != 1.0 {
994 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
996 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
997 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
998 } else {
999 base_amount
1000 }
1001 };
1002
1003 let total_amount = if is_automated {
1005 drift_adjusted_amount } else {
1007 self.apply_human_variation(drift_adjusted_amount)
1008 };
1009
1010 let debit_amounts = self
1012 .amount_sampler
1013 .sample_summing_to(line_spec.debit_count, total_amount);
1014 for (i, amount) in debit_amounts.into_iter().enumerate() {
1015 let account_number = self.select_debit_account().account_number.clone();
1016 let mut line = JournalEntryLine::debit(
1017 entry.header.document_id,
1018 (i + 1) as u32,
1019 account_number.clone(),
1020 amount,
1021 );
1022
1023 if self.template_config.descriptions.generate_line_text {
1025 line.line_text = Some(self.description_generator.generate_line_text(
1026 &account_number,
1027 &context,
1028 &mut self.rng,
1029 ));
1030 }
1031
1032 entry.add_line(line);
1033 }
1034
1035 let credit_amounts = self
1037 .amount_sampler
1038 .sample_summing_to(line_spec.credit_count, total_amount);
1039 for (i, amount) in credit_amounts.into_iter().enumerate() {
1040 let account_number = self.select_credit_account().account_number.clone();
1041 let mut line = JournalEntryLine::credit(
1042 entry.header.document_id,
1043 (line_spec.debit_count + i + 1) as u32,
1044 account_number.clone(),
1045 amount,
1046 );
1047
1048 if self.template_config.descriptions.generate_line_text {
1050 line.line_text = Some(self.description_generator.generate_line_text(
1051 &account_number,
1052 &context,
1053 &mut self.rng,
1054 ));
1055 }
1056
1057 entry.add_line(line);
1058 }
1059
1060 self.enrich_line_items(&mut entry);
1062
1063 if self.persona_errors_enabled && !is_automated {
1065 self.maybe_inject_persona_error(&mut entry);
1066 }
1067
1068 if self.approval_enabled {
1070 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1071 }
1072
1073 self.populate_approval_fields(&mut entry, posting_date);
1075
1076 self.maybe_start_batch(&entry);
1078
1079 entry
1080 }
1081
1082 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1087 self.persona_errors_enabled = enabled;
1088 self
1089 }
1090
1091 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1096 self.fraud_config = config;
1097 self
1098 }
1099
1100 pub fn persona_errors_enabled(&self) -> bool {
1102 self.persona_errors_enabled
1103 }
1104
1105 pub fn with_batching(mut self, enabled: bool) -> Self {
1110 if !enabled {
1111 self.batch_state = None;
1112 }
1113 self
1114 }
1115
1116 pub fn batching_enabled(&self) -> bool {
1118 true
1120 }
1121
1122 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1127 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1129 return;
1130 }
1131
1132 if self.rng.random::<f64>() > 0.15 {
1134 return;
1135 }
1136
1137 let base_account = entry
1139 .lines
1140 .first()
1141 .map(|l| l.gl_account.clone())
1142 .unwrap_or_default();
1143
1144 let base_amount = entry.total_debit();
1145
1146 self.batch_state = Some(BatchState {
1147 base_account_number: base_account,
1148 base_amount,
1149 base_business_process: entry.header.business_process,
1150 base_posting_date: entry.header.posting_date,
1151 remaining: self.rng.random_range(2..7), });
1153 }
1154
1155 fn generate_batched_entry(&mut self) -> JournalEntry {
1163 use rust_decimal::Decimal;
1164
1165 if let Some(ref mut state) = self.batch_state {
1167 state.remaining = state.remaining.saturating_sub(1);
1168 }
1169
1170 let Some(batch) = self.batch_state.clone() else {
1171 tracing::warn!(
1174 "generate_batched_entry called without batch_state; generating standard entry"
1175 );
1176 self.batch_state = None;
1177 return self.generate();
1178 };
1179
1180 let posting_date = batch.base_posting_date;
1182
1183 self.count += 1;
1184 let document_id = self.generate_deterministic_uuid();
1185
1186 let company_code = self.company_selector.select(&mut self.rng).to_string();
1188
1189 let _line_spec = LineItemSpec {
1191 total_count: 2,
1192 debit_count: 1,
1193 credit_count: 1,
1194 split_type: DebitCreditSplit::Equal,
1195 };
1196
1197 let source = TransactionSource::Manual;
1199
1200 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1202
1203 let time = self.temporal_sampler.sample_time(true);
1205 let created_at = posting_date.and_time(time).and_utc();
1206
1207 let (created_by, user_persona) = self.select_user(false);
1209
1210 let mut header =
1212 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1213 header.created_at = created_at;
1214 header.source = source;
1215 header.created_by = created_by;
1216 header.user_persona = user_persona;
1217 header.business_process = Some(business_process);
1218 header.document_type = Self::document_type_for_process(business_process).to_string();
1219
1220 header.source_document = Some(DocumentRef::Manual);
1222
1223 let variation = self.rng.random_range(-0.15..0.15);
1225 let varied_amount =
1226 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1227 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1228
1229 let mut entry = JournalEntry::new(header);
1231
1232 let debit_line = JournalEntryLine::debit(
1234 entry.header.document_id,
1235 1,
1236 batch.base_account_number.clone(),
1237 total_amount,
1238 );
1239 entry.add_line(debit_line);
1240
1241 let credit_account = self.select_credit_account().account_number.clone();
1243 let credit_line =
1244 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1245 entry.add_line(credit_line);
1246
1247 self.enrich_line_items(&mut entry);
1249
1250 if self.persona_errors_enabled {
1252 self.maybe_inject_persona_error(&mut entry);
1253 }
1254
1255 if self.approval_enabled {
1257 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1258 }
1259
1260 self.populate_approval_fields(&mut entry, posting_date);
1262
1263 if batch.remaining <= 1 {
1265 self.batch_state = None;
1266 }
1267
1268 entry
1269 }
1270
1271 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1273 let persona_str = &entry.header.user_persona;
1275 let persona = match persona_str.to_lowercase().as_str() {
1276 s if s.contains("junior") => UserPersona::JuniorAccountant,
1277 s if s.contains("senior") => UserPersona::SeniorAccountant,
1278 s if s.contains("controller") => UserPersona::Controller,
1279 s if s.contains("manager") => UserPersona::Manager,
1280 s if s.contains("executive") => UserPersona::Executive,
1281 _ => return, };
1283
1284 let base_error_rate = persona.error_rate();
1286
1287 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1289
1290 if self.rng.random::<f64>() >= adjusted_rate {
1292 return; }
1294
1295 self.inject_human_error(entry, persona);
1297 }
1298
1299 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1308 use chrono::Datelike;
1309
1310 let mut rate = base_rate;
1311 let day = posting_date.day();
1312 let month = posting_date.month();
1313
1314 if month == 12 && day >= 28 {
1316 rate *= 2.0;
1317 return rate.min(0.5); }
1319
1320 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1322 rate *= 1.75; return rate.min(0.4);
1324 }
1325
1326 if day >= 28 {
1328 rate *= 1.5; }
1330
1331 let weekday = posting_date.weekday();
1333 match weekday {
1334 chrono::Weekday::Mon => {
1335 rate *= 1.2;
1337 }
1338 chrono::Weekday::Fri => {
1339 rate *= 1.3;
1341 }
1342 _ => {}
1343 }
1344
1345 rate.min(0.4)
1347 }
1348
1349 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1358 use rust_decimal::Decimal;
1359
1360 if amount < Decimal::from(10) {
1362 return amount;
1363 }
1364
1365 if self.rng.random::<f64>() > 0.70 {
1367 return amount;
1368 }
1369
1370 let variation_type: u8 = self.rng.random_range(0..4);
1372
1373 match variation_type {
1374 0 => {
1375 let variation_pct = self.rng.random_range(-0.02..0.02);
1377 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1378 (amount + variation).round_dp(2)
1379 }
1380 1 => {
1381 let ten = Decimal::from(10);
1383 (amount / ten).round() * ten
1384 }
1385 2 => {
1386 if amount >= Decimal::from(500) {
1388 let hundred = Decimal::from(100);
1389 (amount / hundred).round() * hundred
1390 } else {
1391 amount
1392 }
1393 }
1394 3 => {
1395 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1397 (amount + cents).max(Decimal::ZERO).round_dp(2)
1398 }
1399 _ => amount,
1400 }
1401 }
1402
1403 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1409 let balancing_idx = entry.lines.iter().position(|l| {
1411 if modified_was_debit {
1412 l.credit_amount > Decimal::ZERO
1413 } else {
1414 l.debit_amount > Decimal::ZERO
1415 }
1416 });
1417
1418 if let Some(idx) = balancing_idx {
1419 if modified_was_debit {
1420 entry.lines[idx].credit_amount += impact;
1421 } else {
1422 entry.lines[idx].debit_amount += impact;
1423 }
1424 }
1425 }
1426
1427 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1432 use rust_decimal::Decimal;
1433
1434 let error_type: u8 = match persona {
1436 UserPersona::JuniorAccountant => {
1437 self.rng.random_range(0..5)
1439 }
1440 UserPersona::SeniorAccountant => {
1441 self.rng.random_range(0..3)
1443 }
1444 UserPersona::Controller | UserPersona::Manager => {
1445 self.rng.random_range(3..5)
1447 }
1448 _ => return,
1449 };
1450
1451 match error_type {
1452 0 => {
1453 if let Some(line) = entry.lines.get_mut(0) {
1455 let is_debit = line.debit_amount > Decimal::ZERO;
1456 let original_amount = if is_debit {
1457 line.debit_amount
1458 } else {
1459 line.credit_amount
1460 };
1461
1462 let s = original_amount.to_string();
1464 if s.len() >= 2 {
1465 let chars: Vec<char> = s.chars().collect();
1466 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1467 if chars[pos].is_ascii_digit()
1468 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1469 {
1470 let mut new_chars = chars;
1471 new_chars.swap(pos, pos + 1);
1472 if let Ok(new_amount) =
1473 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1474 {
1475 let impact = new_amount - original_amount;
1476
1477 if is_debit {
1479 entry.lines[0].debit_amount = new_amount;
1480 } else {
1481 entry.lines[0].credit_amount = new_amount;
1482 }
1483
1484 Self::rebalance_entry(entry, is_debit, impact);
1486
1487 entry.header.header_text = Some(
1488 entry.header.header_text.clone().unwrap_or_default()
1489 + " [HUMAN_ERROR:TRANSPOSITION]",
1490 );
1491 }
1492 }
1493 }
1494 }
1495 }
1496 1 => {
1497 if let Some(line) = entry.lines.get_mut(0) {
1499 let is_debit = line.debit_amount > Decimal::ZERO;
1500 let original_amount = if is_debit {
1501 line.debit_amount
1502 } else {
1503 line.credit_amount
1504 };
1505
1506 let new_amount = original_amount * Decimal::new(10, 0);
1507 let impact = new_amount - original_amount;
1508
1509 if is_debit {
1511 entry.lines[0].debit_amount = new_amount;
1512 } else {
1513 entry.lines[0].credit_amount = new_amount;
1514 }
1515
1516 Self::rebalance_entry(entry, is_debit, impact);
1518
1519 entry.header.header_text = Some(
1520 entry.header.header_text.clone().unwrap_or_default()
1521 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1522 );
1523 }
1524 }
1525 2 => {
1526 if let Some(ref mut text) = entry.header.header_text {
1528 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1529 let correct = ["the", "and", "with", "that", "receive"];
1530 let idx = self.rng.random_range(0..typos.len());
1531 if text.to_lowercase().contains(correct[idx]) {
1532 *text = text.replace(correct[idx], typos[idx]);
1533 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1534 }
1535 }
1536 }
1537 3 => {
1538 if let Some(line) = entry.lines.get_mut(0) {
1540 let is_debit = line.debit_amount > Decimal::ZERO;
1541 let original_amount = if is_debit {
1542 line.debit_amount
1543 } else {
1544 line.credit_amount
1545 };
1546
1547 let new_amount =
1548 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1549 let impact = new_amount - original_amount;
1550
1551 if is_debit {
1553 entry.lines[0].debit_amount = new_amount;
1554 } else {
1555 entry.lines[0].credit_amount = new_amount;
1556 }
1557
1558 Self::rebalance_entry(entry, is_debit, impact);
1560
1561 entry.header.header_text = Some(
1562 entry.header.header_text.clone().unwrap_or_default()
1563 + " [HUMAN_ERROR:ROUNDED]",
1564 );
1565 }
1566 }
1567 4 => {
1568 if entry.header.document_date == entry.header.posting_date {
1571 let days_late = self.rng.random_range(5..15);
1572 entry.header.document_date =
1573 entry.header.posting_date - chrono::Duration::days(days_late);
1574 entry.header.header_text = Some(
1575 entry.header.header_text.clone().unwrap_or_default()
1576 + " [HUMAN_ERROR:LATE_POSTING]",
1577 );
1578 }
1579 }
1580 _ => {}
1581 }
1582 }
1583
1584 fn maybe_apply_approval_workflow(
1589 &mut self,
1590 entry: &mut JournalEntry,
1591 _posting_date: NaiveDate,
1592 ) {
1593 use rust_decimal::Decimal;
1594
1595 let amount = entry.total_debit();
1596
1597 if amount <= self.approval_threshold {
1599 let workflow = ApprovalWorkflow::auto_approved(
1601 entry.header.created_by.clone(),
1602 entry.header.user_persona.clone(),
1603 amount,
1604 entry.header.created_at,
1605 );
1606 entry.header.approval_workflow = Some(workflow);
1607 return;
1608 }
1609
1610 entry.header.sox_relevant = true;
1612
1613 let required_levels = if amount > Decimal::new(100000, 0) {
1615 3 } else if amount > Decimal::new(50000, 0) {
1617 2 } else {
1619 1 };
1621
1622 let mut workflow = ApprovalWorkflow::new(
1624 entry.header.created_by.clone(),
1625 entry.header.user_persona.clone(),
1626 amount,
1627 );
1628 workflow.required_levels = required_levels;
1629
1630 let submit_time = entry.header.created_at;
1632 let submit_action = ApprovalAction::new(
1633 entry.header.created_by.clone(),
1634 entry.header.user_persona.clone(),
1635 self.parse_persona(&entry.header.user_persona),
1636 ApprovalActionType::Submit,
1637 0,
1638 )
1639 .with_timestamp(submit_time);
1640
1641 workflow.actions.push(submit_action);
1642 workflow.status = ApprovalStatus::Pending;
1643 workflow.submitted_at = Some(submit_time);
1644
1645 let mut current_time = submit_time;
1647 for level in 1..=required_levels {
1648 let delay_hours = self.rng.random_range(1..4);
1650 current_time += chrono::Duration::hours(delay_hours);
1651
1652 while current_time.weekday() == chrono::Weekday::Sat
1654 || current_time.weekday() == chrono::Weekday::Sun
1655 {
1656 current_time += chrono::Duration::days(1);
1657 }
1658
1659 let (approver_id, approver_role) = self.select_approver(level);
1661
1662 let approve_action = ApprovalAction::new(
1663 approver_id.clone(),
1664 approver_role.to_string(),
1665 approver_role,
1666 ApprovalActionType::Approve,
1667 level,
1668 )
1669 .with_timestamp(current_time);
1670
1671 workflow.actions.push(approve_action);
1672 workflow.current_level = level;
1673 }
1674
1675 workflow.status = ApprovalStatus::Approved;
1677 workflow.approved_at = Some(current_time);
1678
1679 entry.header.approval_workflow = Some(workflow);
1680 }
1681
1682 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1684 let persona = match level {
1685 1 => UserPersona::Manager,
1686 2 => UserPersona::Controller,
1687 _ => UserPersona::Executive,
1688 };
1689
1690 if let Some(ref pool) = self.user_pool {
1692 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1693 return (user.user_id.clone(), persona);
1694 }
1695 }
1696
1697 let approver_id = match persona {
1699 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1700 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1701 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1702 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1703 };
1704
1705 (approver_id, persona)
1706 }
1707
1708 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1710 match persona_str.to_lowercase().as_str() {
1711 s if s.contains("junior") => UserPersona::JuniorAccountant,
1712 s if s.contains("senior") => UserPersona::SeniorAccountant,
1713 s if s.contains("controller") => UserPersona::Controller,
1714 s if s.contains("manager") => UserPersona::Manager,
1715 s if s.contains("executive") => UserPersona::Executive,
1716 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1717 _ => UserPersona::JuniorAccountant, }
1719 }
1720
1721 pub fn with_approval(mut self, enabled: bool) -> Self {
1723 self.approval_enabled = enabled;
1724 self
1725 }
1726
1727 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1729 self.approval_threshold = threshold;
1730 self
1731 }
1732
1733 pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
1739 self.sod_violation_rate = rate;
1740 self
1741 }
1742
1743 fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
1746 if let Some(ref workflow) = entry.header.approval_workflow {
1747 let last_approver = workflow
1749 .actions
1750 .iter()
1751 .rev()
1752 .find(|a| matches!(a.action, ApprovalActionType::Approve));
1753
1754 if let Some(approver_action) = last_approver {
1755 entry.header.approved_by = Some(approver_action.actor_id.clone());
1756 entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
1757 } else {
1758 entry.header.approved_by = Some(workflow.preparer_id.clone());
1760 entry.header.approval_date = Some(posting_date);
1761 }
1762
1763 if self.rng.random::<f64>() < self.sod_violation_rate {
1765 let creator = entry.header.created_by.clone();
1766 entry.header.approved_by = Some(creator);
1767 entry.header.sod_violation = true;
1768 entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
1769 }
1770 }
1771 }
1772
1773 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1779 self.drift_controller = Some(controller);
1780 self
1781 }
1782
1783 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1788 if config.enabled {
1789 let total_periods = self.calculate_total_periods();
1790 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1791 }
1792 self
1793 }
1794
1795 fn calculate_total_periods(&self) -> u32 {
1797 let start_year = self.start_date.year();
1798 let start_month = self.start_date.month();
1799 let end_year = self.end_date.year();
1800 let end_month = self.end_date.month();
1801
1802 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1803 }
1804
1805 fn date_to_period(&self, date: NaiveDate) -> u32 {
1807 let start_year = self.start_date.year();
1808 let start_month = self.start_date.month() as i32;
1809 let date_year = date.year();
1810 let date_month = date.month() as i32;
1811
1812 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1813 }
1814
1815 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1817 if let Some(ref controller) = self.drift_controller {
1818 let period = self.date_to_period(date);
1819 controller.compute_adjustments(period)
1820 } else {
1821 DriftAdjustments::none()
1822 }
1823 }
1824
1825 #[inline]
1827 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1828 if let Some(ref pool) = self.user_pool {
1829 let persona = if is_automated {
1830 UserPersona::AutomatedSystem
1831 } else {
1832 let roll: f64 = self.rng.random();
1834 if roll < 0.4 {
1835 UserPersona::JuniorAccountant
1836 } else if roll < 0.7 {
1837 UserPersona::SeniorAccountant
1838 } else if roll < 0.85 {
1839 UserPersona::Controller
1840 } else {
1841 UserPersona::Manager
1842 }
1843 };
1844
1845 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1846 return (user.user_id.clone(), user.persona.to_string());
1847 }
1848 }
1849
1850 if is_automated {
1852 (
1853 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1854 "automated_system".to_string(),
1855 )
1856 } else {
1857 (
1858 format!("USER{:04}", self.rng.random_range(1..=40)),
1859 "senior_accountant".to_string(),
1860 )
1861 }
1862 }
1863
1864 #[inline]
1866 fn select_source(&mut self) -> TransactionSource {
1867 let roll: f64 = self.rng.random();
1868 let dist = &self.config.source_distribution;
1869
1870 if roll < dist.manual {
1871 TransactionSource::Manual
1872 } else if roll < dist.manual + dist.automated {
1873 TransactionSource::Automated
1874 } else if roll < dist.manual + dist.automated + dist.recurring {
1875 TransactionSource::Recurring
1876 } else {
1877 TransactionSource::Adjustment
1878 }
1879 }
1880
1881 #[inline]
1883 fn document_type_for_process(process: BusinessProcess) -> &'static str {
1892 match process {
1893 BusinessProcess::P2P => "KR",
1894 BusinessProcess::O2C => "DR",
1895 BusinessProcess::R2R => "SA",
1896 BusinessProcess::H2R => "HR",
1897 BusinessProcess::A2R => "AA",
1898 _ => "SA",
1899 }
1900 }
1901
1902 fn select_business_process(&mut self) -> BusinessProcess {
1903 let roll: f64 = self.rng.random();
1904
1905 if roll < 0.35 {
1907 BusinessProcess::O2C
1908 } else if roll < 0.65 {
1909 BusinessProcess::P2P
1910 } else if roll < 0.85 {
1911 BusinessProcess::R2R
1912 } else if roll < 0.95 {
1913 BusinessProcess::H2R
1914 } else {
1915 BusinessProcess::A2R
1916 }
1917 }
1918
1919 #[inline]
1920 fn select_debit_account(&mut self) -> &GLAccount {
1921 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1922 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1923
1924 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1926 accounts
1927 } else {
1928 expense_accounts
1929 };
1930
1931 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1932 tracing::warn!(
1933 "Account selection returned empty list, falling back to first COA account"
1934 );
1935 &self.coa.accounts[0]
1936 })
1937 }
1938
1939 #[inline]
1940 fn select_credit_account(&mut self) -> &GLAccount {
1941 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1942 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1943
1944 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1946 liability_accounts
1947 } else {
1948 revenue_accounts
1949 };
1950
1951 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1952 tracing::warn!(
1953 "Account selection returned empty list, falling back to first COA account"
1954 );
1955 &self.coa.accounts[0]
1956 })
1957 }
1958}
1959
1960impl Generator for JournalEntryGenerator {
1961 type Item = JournalEntry;
1962 type Config = (
1963 TransactionConfig,
1964 Arc<ChartOfAccounts>,
1965 Vec<String>,
1966 NaiveDate,
1967 NaiveDate,
1968 );
1969
1970 fn new(config: Self::Config, seed: u64) -> Self {
1971 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1972 }
1973
1974 fn generate_one(&mut self) -> Self::Item {
1975 self.generate()
1976 }
1977
1978 fn reset(&mut self) {
1979 self.rng = seeded_rng(self.seed, 0);
1980 self.line_sampler.reset(self.seed + 1);
1981 self.amount_sampler.reset(self.seed + 2);
1982 self.temporal_sampler.reset(self.seed + 3);
1983 self.count = 0;
1984 self.uuid_factory.reset();
1985
1986 let mut ref_gen = ReferenceGenerator::new(
1988 self.start_date.year(),
1989 self.companies
1990 .first()
1991 .map(std::string::String::as_str)
1992 .unwrap_or("1000"),
1993 );
1994 ref_gen.set_prefix(
1995 ReferenceType::Invoice,
1996 &self.template_config.references.invoice_prefix,
1997 );
1998 ref_gen.set_prefix(
1999 ReferenceType::PurchaseOrder,
2000 &self.template_config.references.po_prefix,
2001 );
2002 ref_gen.set_prefix(
2003 ReferenceType::SalesOrder,
2004 &self.template_config.references.so_prefix,
2005 );
2006 self.reference_generator = ref_gen;
2007 }
2008
2009 fn count(&self) -> u64 {
2010 self.count
2011 }
2012
2013 fn seed(&self) -> u64 {
2014 self.seed
2015 }
2016}
2017
2018use datasynth_core::traits::ParallelGenerator;
2019
2020impl ParallelGenerator for JournalEntryGenerator {
2021 fn split(self, parts: usize) -> Vec<Self> {
2027 let parts = parts.max(1);
2028 (0..parts)
2029 .map(|i| {
2030 let sub_seed = self
2032 .seed
2033 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
2034
2035 let mut gen = JournalEntryGenerator::new_with_full_config(
2036 self.config.clone(),
2037 Arc::clone(&self.coa),
2038 self.companies.clone(),
2039 self.start_date,
2040 self.end_date,
2041 sub_seed,
2042 self.template_config.clone(),
2043 self.user_pool.clone(),
2044 );
2045
2046 gen.company_selector = self.company_selector.clone();
2048 gen.vendor_pool = self.vendor_pool.clone();
2049 gen.customer_pool = self.customer_pool.clone();
2050 gen.material_pool = self.material_pool.clone();
2051 gen.using_real_master_data = self.using_real_master_data;
2052 gen.fraud_config = self.fraud_config.clone();
2053 gen.persona_errors_enabled = self.persona_errors_enabled;
2054 gen.approval_enabled = self.approval_enabled;
2055 gen.approval_threshold = self.approval_threshold;
2056 gen.sod_violation_rate = self.sod_violation_rate;
2057
2058 gen.uuid_factory = DeterministicUuidFactory::for_partition(
2060 sub_seed,
2061 GeneratorType::JournalEntry,
2062 i as u8,
2063 );
2064
2065 if let Some(ref config) = self.temporal_patterns_config {
2067 gen.temporal_patterns_config = Some(config.clone());
2068 if config.business_days.enabled {
2070 if let Some(ref bdc) = self.business_day_calculator {
2071 gen.business_day_calculator = Some(bdc.clone());
2072 }
2073 }
2074 if config.processing_lags.enabled {
2076 let lag_config =
2077 Self::convert_processing_lag_config(&config.processing_lags);
2078 gen.processing_lag_calculator =
2079 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2080 }
2081 }
2082
2083 if let Some(ref dc) = self.drift_controller {
2085 gen.drift_controller = Some(dc.clone());
2086 }
2087
2088 gen
2089 })
2090 .collect()
2091 }
2092}
2093
2094#[cfg(test)]
2095#[allow(clippy::unwrap_used)]
2096mod tests {
2097 use super::*;
2098 use crate::ChartOfAccountsGenerator;
2099
2100 #[test]
2101 fn test_generate_balanced_entries() {
2102 let mut coa_gen =
2103 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2104 let coa = Arc::new(coa_gen.generate());
2105
2106 let mut je_gen = JournalEntryGenerator::new_with_params(
2107 TransactionConfig::default(),
2108 coa,
2109 vec!["1000".to_string()],
2110 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2111 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2112 42,
2113 );
2114
2115 let mut balanced_count = 0;
2116 for _ in 0..100 {
2117 let entry = je_gen.generate();
2118
2119 let has_human_error = entry
2121 .header
2122 .header_text
2123 .as_ref()
2124 .map(|t| t.contains("[HUMAN_ERROR:"))
2125 .unwrap_or(false);
2126
2127 if !has_human_error {
2128 assert!(
2129 entry.is_balanced(),
2130 "Entry {:?} is not balanced",
2131 entry.header.document_id
2132 );
2133 balanced_count += 1;
2134 }
2135 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2136 }
2137
2138 assert!(
2140 balanced_count >= 80,
2141 "Expected at least 80 balanced entries, got {}",
2142 balanced_count
2143 );
2144 }
2145
2146 #[test]
2147 fn test_deterministic_generation() {
2148 let mut coa_gen =
2149 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2150 let coa = Arc::new(coa_gen.generate());
2151
2152 let mut gen1 = JournalEntryGenerator::new_with_params(
2153 TransactionConfig::default(),
2154 Arc::clone(&coa),
2155 vec!["1000".to_string()],
2156 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2157 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2158 42,
2159 );
2160
2161 let mut gen2 = JournalEntryGenerator::new_with_params(
2162 TransactionConfig::default(),
2163 coa,
2164 vec!["1000".to_string()],
2165 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2166 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2167 42,
2168 );
2169
2170 for _ in 0..50 {
2171 let e1 = gen1.generate();
2172 let e2 = gen2.generate();
2173 assert_eq!(e1.header.document_id, e2.header.document_id);
2174 assert_eq!(e1.total_debit(), e2.total_debit());
2175 }
2176 }
2177
2178 #[test]
2179 fn test_templates_generate_descriptions() {
2180 let mut coa_gen =
2181 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2182 let coa = Arc::new(coa_gen.generate());
2183
2184 let template_config = TemplateConfig {
2186 names: datasynth_config::schema::NameTemplateConfig {
2187 generate_realistic_names: true,
2188 email_domain: "test.com".to_string(),
2189 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2190 },
2191 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2192 generate_header_text: true,
2193 generate_line_text: true,
2194 },
2195 references: datasynth_config::schema::ReferenceTemplateConfig {
2196 generate_references: true,
2197 invoice_prefix: "TEST-INV".to_string(),
2198 po_prefix: "TEST-PO".to_string(),
2199 so_prefix: "TEST-SO".to_string(),
2200 },
2201 };
2202
2203 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2204 TransactionConfig::default(),
2205 coa,
2206 vec!["1000".to_string()],
2207 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2208 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2209 42,
2210 template_config,
2211 None,
2212 )
2213 .with_persona_errors(false); for _ in 0..10 {
2216 let entry = je_gen.generate();
2217
2218 assert!(
2220 entry.header.header_text.is_some(),
2221 "Header text should be populated"
2222 );
2223
2224 assert!(
2226 entry.header.reference.is_some(),
2227 "Reference should be populated"
2228 );
2229
2230 assert!(
2232 entry.header.business_process.is_some(),
2233 "Business process should be set"
2234 );
2235
2236 for line in &entry.lines {
2238 assert!(line.line_text.is_some(), "Line text should be populated");
2239 }
2240
2241 assert!(entry.is_balanced());
2243 }
2244 }
2245
2246 #[test]
2247 fn test_user_pool_integration() {
2248 let mut coa_gen =
2249 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2250 let coa = Arc::new(coa_gen.generate());
2251
2252 let companies = vec!["1000".to_string()];
2253
2254 let mut user_gen = crate::UserGenerator::new(42);
2256 let user_pool = user_gen.generate_standard(&companies);
2257
2258 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2259 TransactionConfig::default(),
2260 coa,
2261 companies,
2262 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2263 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2264 42,
2265 TemplateConfig::default(),
2266 Some(user_pool),
2267 );
2268
2269 for _ in 0..20 {
2271 let entry = je_gen.generate();
2272
2273 assert!(!entry.header.created_by.is_empty());
2276 }
2277 }
2278
2279 #[test]
2280 fn test_master_data_connection() {
2281 let mut coa_gen =
2282 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2283 let coa = Arc::new(coa_gen.generate());
2284
2285 let vendors = vec![
2287 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2288 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2289 ];
2290
2291 let customers = vec![
2293 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2294 Customer::new(
2295 "C-TEST-002",
2296 "Test Customer Two",
2297 CustomerType::SmallBusiness,
2298 ),
2299 ];
2300
2301 let materials = vec![Material::new(
2303 "MAT-TEST-001",
2304 "Test Material A",
2305 MaterialType::RawMaterial,
2306 )];
2307
2308 let generator = JournalEntryGenerator::new_with_params(
2310 TransactionConfig::default(),
2311 coa,
2312 vec!["1000".to_string()],
2313 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2314 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2315 42,
2316 );
2317
2318 assert!(!generator.is_using_real_master_data());
2320
2321 let generator_with_data = generator
2323 .with_vendors(&vendors)
2324 .with_customers(&customers)
2325 .with_materials(&materials);
2326
2327 assert!(generator_with_data.is_using_real_master_data());
2329 }
2330
2331 #[test]
2332 fn test_with_master_data_convenience_method() {
2333 let mut coa_gen =
2334 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2335 let coa = Arc::new(coa_gen.generate());
2336
2337 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2338 let customers = vec![Customer::new(
2339 "C-001",
2340 "Customer One",
2341 CustomerType::Corporate,
2342 )];
2343 let materials = vec![Material::new(
2344 "MAT-001",
2345 "Material One",
2346 MaterialType::RawMaterial,
2347 )];
2348
2349 let generator = JournalEntryGenerator::new_with_params(
2350 TransactionConfig::default(),
2351 coa,
2352 vec!["1000".to_string()],
2353 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2354 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2355 42,
2356 )
2357 .with_master_data(&vendors, &customers, &materials);
2358
2359 assert!(generator.is_using_real_master_data());
2360 }
2361
2362 #[test]
2363 fn test_stress_factors_increase_error_rate() {
2364 let mut coa_gen =
2365 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2366 let coa = Arc::new(coa_gen.generate());
2367
2368 let generator = JournalEntryGenerator::new_with_params(
2369 TransactionConfig::default(),
2370 coa,
2371 vec!["1000".to_string()],
2372 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2373 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2374 42,
2375 );
2376
2377 let base_rate = 0.1;
2378
2379 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2382 assert!(
2383 (regular_rate - base_rate).abs() < 0.01,
2384 "Regular day should have minimal stress factor adjustment"
2385 );
2386
2387 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2390 assert!(
2391 month_end_rate > regular_rate,
2392 "Month end should have higher error rate than regular day"
2393 );
2394
2395 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2398 assert!(
2399 year_end_rate > month_end_rate,
2400 "Year end should have highest error rate"
2401 );
2402
2403 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2406 assert!(
2407 friday_rate > regular_rate,
2408 "Friday should have higher error rate than mid-week"
2409 );
2410
2411 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2414 assert!(
2415 monday_rate > regular_rate,
2416 "Monday should have higher error rate than mid-week"
2417 );
2418 }
2419
2420 #[test]
2421 fn test_batching_produces_similar_entries() {
2422 let mut coa_gen =
2423 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2424 let coa = Arc::new(coa_gen.generate());
2425
2426 let mut je_gen = JournalEntryGenerator::new_with_params(
2428 TransactionConfig::default(),
2429 coa,
2430 vec!["1000".to_string()],
2431 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2432 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2433 123,
2434 )
2435 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2439
2440 for entry in &entries {
2442 assert!(
2443 entry.is_balanced(),
2444 "All entries including batched should be balanced"
2445 );
2446 }
2447
2448 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2450 std::collections::HashMap::new();
2451 for entry in &entries {
2452 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2453 }
2454
2455 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2457 assert!(
2458 dates_with_multiple > 0,
2459 "With batching, should see some dates with multiple entries"
2460 );
2461 }
2462
2463 #[test]
2464 fn test_temporal_patterns_business_days() {
2465 use datasynth_config::schema::{
2466 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2467 };
2468
2469 let mut coa_gen =
2470 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2471 let coa = Arc::new(coa_gen.generate());
2472
2473 let temporal_config = TemporalPatternsConfig {
2475 enabled: true,
2476 business_days: BusinessDaySchemaConfig {
2477 enabled: true,
2478 ..Default::default()
2479 },
2480 calendars: CalendarSchemaConfig {
2481 regions: vec!["US".to_string()],
2482 custom_holidays: vec![],
2483 },
2484 ..Default::default()
2485 };
2486
2487 let mut je_gen = JournalEntryGenerator::new_with_params(
2488 TransactionConfig::default(),
2489 coa,
2490 vec!["1000".to_string()],
2491 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2492 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2494 )
2495 .with_temporal_patterns(temporal_config, 42)
2496 .with_persona_errors(false);
2497
2498 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2500
2501 for entry in &entries {
2502 let weekday = entry.header.posting_date.weekday();
2503 assert!(
2504 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2505 "Posting date {:?} should not be a weekend",
2506 entry.header.posting_date
2507 );
2508 }
2509 }
2510
2511 #[test]
2512 fn test_default_generation_filters_weekends() {
2513 let mut coa_gen =
2517 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2518 let coa = Arc::new(coa_gen.generate());
2519
2520 let mut je_gen = JournalEntryGenerator::new_with_params(
2521 TransactionConfig::default(),
2522 coa,
2523 vec!["1000".to_string()],
2524 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2525 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2526 42,
2527 )
2528 .with_persona_errors(false);
2529
2530 let total = 500;
2531 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2532
2533 let weekend_count = entries
2534 .iter()
2535 .filter(|e| {
2536 let wd = e.header.posting_date.weekday();
2537 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2538 })
2539 .count();
2540
2541 let weekend_pct = weekend_count as f64 / total as f64;
2542 assert!(
2543 weekend_pct < 0.05,
2544 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2545 but got {:.1}% ({}/{})",
2546 weekend_pct * 100.0,
2547 weekend_count,
2548 total
2549 );
2550 }
2551
2552 #[test]
2553 fn test_document_type_derived_from_business_process() {
2554 let mut coa_gen =
2555 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2556 let coa = Arc::new(coa_gen.generate());
2557
2558 let mut je_gen = JournalEntryGenerator::new_with_params(
2559 TransactionConfig::default(),
2560 coa,
2561 vec!["1000".to_string()],
2562 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2563 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2564 99,
2565 )
2566 .with_persona_errors(false)
2567 .with_batching(false);
2568
2569 let total = 200;
2570 let mut doc_types = std::collections::HashSet::new();
2571 let mut sa_count = 0_usize;
2572
2573 for _ in 0..total {
2574 let entry = je_gen.generate();
2575 let dt = &entry.header.document_type;
2576 doc_types.insert(dt.clone());
2577 if dt == "SA" {
2578 sa_count += 1;
2579 }
2580 }
2581
2582 assert!(
2584 doc_types.len() > 3,
2585 "Expected >3 distinct document types, got {} ({:?})",
2586 doc_types.len(),
2587 doc_types,
2588 );
2589
2590 let sa_pct = sa_count as f64 / total as f64;
2592 assert!(
2593 sa_pct < 0.50,
2594 "Expected SA <50%, got {:.1}% ({}/{})",
2595 sa_pct * 100.0,
2596 sa_count,
2597 total,
2598 );
2599 }
2600
2601 #[test]
2602 fn test_enrich_line_items_account_description() {
2603 let mut coa_gen =
2604 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2605 let coa = Arc::new(coa_gen.generate());
2606
2607 let mut je_gen = JournalEntryGenerator::new_with_params(
2608 TransactionConfig::default(),
2609 coa,
2610 vec!["1000".to_string()],
2611 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2612 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2613 42,
2614 )
2615 .with_persona_errors(false);
2616
2617 let total = 200;
2618 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2619
2620 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2622 let lines_with_desc: usize = entries
2623 .iter()
2624 .flat_map(|e| &e.lines)
2625 .filter(|l| l.account_description.is_some())
2626 .count();
2627
2628 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2629 assert!(
2630 desc_pct > 0.95,
2631 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2632 desc_pct * 100.0,
2633 lines_with_desc,
2634 total_lines,
2635 );
2636 }
2637
2638 #[test]
2639 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2640 let mut coa_gen =
2641 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2642 let coa = Arc::new(coa_gen.generate());
2643
2644 let mut je_gen = JournalEntryGenerator::new_with_params(
2645 TransactionConfig::default(),
2646 coa,
2647 vec!["1000".to_string()],
2648 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2649 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2650 42,
2651 )
2652 .with_persona_errors(false);
2653
2654 let total = 300;
2655 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2656
2657 let expense_lines: Vec<&JournalEntryLine> = entries
2659 .iter()
2660 .flat_map(|e| &e.lines)
2661 .filter(|l| {
2662 let first = l.gl_account.chars().next().unwrap_or('0');
2663 first == '5' || first == '6'
2664 })
2665 .collect();
2666
2667 if !expense_lines.is_empty() {
2668 let with_cc = expense_lines
2669 .iter()
2670 .filter(|l| l.cost_center.is_some())
2671 .count();
2672 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2673 assert!(
2674 cc_pct > 0.80,
2675 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2676 cc_pct * 100.0,
2677 with_cc,
2678 expense_lines.len(),
2679 );
2680 }
2681 }
2682
2683 #[test]
2684 fn test_enrich_line_items_profit_center_and_line_text() {
2685 let mut coa_gen =
2686 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2687 let coa = Arc::new(coa_gen.generate());
2688
2689 let mut je_gen = JournalEntryGenerator::new_with_params(
2690 TransactionConfig::default(),
2691 coa,
2692 vec!["1000".to_string()],
2693 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2694 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2695 42,
2696 )
2697 .with_persona_errors(false);
2698
2699 let total = 100;
2700 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2701
2702 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2703
2704 let with_pc = entries
2706 .iter()
2707 .flat_map(|e| &e.lines)
2708 .filter(|l| l.profit_center.is_some())
2709 .count();
2710 let pc_pct = with_pc as f64 / total_lines as f64;
2711 assert!(
2712 pc_pct > 0.95,
2713 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2714 pc_pct * 100.0,
2715 with_pc,
2716 total_lines,
2717 );
2718
2719 let with_text = entries
2721 .iter()
2722 .flat_map(|e| &e.lines)
2723 .filter(|l| l.line_text.is_some())
2724 .count();
2725 let text_pct = with_text as f64 / total_lines as f64;
2726 assert!(
2727 text_pct > 0.95,
2728 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2729 text_pct * 100.0,
2730 with_text,
2731 total_lines,
2732 );
2733 }
2734}