1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 batch_state: Option<BatchState>,
67 drift_controller: Option<DriftController>,
69 business_day_calculator: Option<BusinessDayCalculator>,
71 processing_lag_calculator: Option<ProcessingLagCalculator>,
72 temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75#[derive(Clone)]
80struct BatchState {
81 base_account_number: String,
83 base_amount: rust_decimal::Decimal,
84 base_business_process: Option<BusinessProcess>,
85 base_posting_date: NaiveDate,
86 remaining: u8,
88}
89
90impl JournalEntryGenerator {
91 pub fn new_with_params(
93 config: TransactionConfig,
94 coa: Arc<ChartOfAccounts>,
95 companies: Vec<String>,
96 start_date: NaiveDate,
97 end_date: NaiveDate,
98 seed: u64,
99 ) -> Self {
100 Self::new_with_full_config(
101 config,
102 coa,
103 companies,
104 start_date,
105 end_date,
106 seed,
107 TemplateConfig::default(),
108 None,
109 )
110 }
111
112 #[allow(clippy::too_many_arguments)]
114 pub fn new_with_full_config(
115 config: TransactionConfig,
116 coa: Arc<ChartOfAccounts>,
117 companies: Vec<String>,
118 start_date: NaiveDate,
119 end_date: NaiveDate,
120 seed: u64,
121 template_config: TemplateConfig,
122 user_pool: Option<UserPool>,
123 ) -> Self {
124 let user_pool = user_pool.or_else(|| {
126 if template_config.names.generate_realistic_names {
127 let user_gen_config = UserGeneratorConfig {
128 culture_distribution: vec![
129 (
130 datasynth_core::templates::NameCulture::WesternUs,
131 template_config.names.culture_distribution.western_us,
132 ),
133 (
134 datasynth_core::templates::NameCulture::Hispanic,
135 template_config.names.culture_distribution.hispanic,
136 ),
137 (
138 datasynth_core::templates::NameCulture::German,
139 template_config.names.culture_distribution.german,
140 ),
141 (
142 datasynth_core::templates::NameCulture::French,
143 template_config.names.culture_distribution.french,
144 ),
145 (
146 datasynth_core::templates::NameCulture::Chinese,
147 template_config.names.culture_distribution.chinese,
148 ),
149 (
150 datasynth_core::templates::NameCulture::Japanese,
151 template_config.names.culture_distribution.japanese,
152 ),
153 (
154 datasynth_core::templates::NameCulture::Indian,
155 template_config.names.culture_distribution.indian,
156 ),
157 ],
158 email_domain: template_config.names.email_domain.clone(),
159 generate_realistic_names: true,
160 };
161 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162 Some(user_gen.generate_standard(&companies))
163 } else {
164 None
165 }
166 });
167
168 let mut ref_gen = ReferenceGenerator::new(
170 start_date.year(),
171 companies
172 .first()
173 .map(std::string::String::as_str)
174 .unwrap_or("1000"),
175 );
176 ref_gen.set_prefix(
177 ReferenceType::Invoice,
178 &template_config.references.invoice_prefix,
179 );
180 ref_gen.set_prefix(
181 ReferenceType::PurchaseOrder,
182 &template_config.references.po_prefix,
183 );
184 ref_gen.set_prefix(
185 ReferenceType::SalesOrder,
186 &template_config.references.so_prefix,
187 );
188
189 let company_selector = WeightedCompanySelector::uniform(companies.clone());
191
192 Self {
193 rng: seeded_rng(seed, 0),
194 seed,
195 config: config.clone(),
196 coa,
197 companies,
198 company_selector,
199 line_sampler: LineItemSampler::with_config(
200 seed + 1,
201 config.line_item_distribution.clone(),
202 config.even_odd_distribution.clone(),
203 config.debit_credit_distribution.clone(),
204 ),
205 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
206 temporal_sampler: TemporalSampler::with_config(
207 seed + 3,
208 config.seasonality.clone(),
209 WorkingHoursConfig::default(),
210 Vec::new(),
211 ),
212 start_date,
213 end_date,
214 count: 0,
215 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
216 user_pool,
217 description_generator: DescriptionGenerator::new(),
218 reference_generator: ref_gen,
219 template_config,
220 vendor_pool: VendorPool::standard(),
221 customer_pool: CustomerPool::standard(),
222 material_pool: None,
223 using_real_master_data: false,
224 fraud_config: FraudConfig::default(),
225 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
229 drift_controller: None,
230 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
233 Region::US,
234 start_date.year(),
235 ))),
236 processing_lag_calculator: None,
237 temporal_patterns_config: None,
238 }
239 }
240
241 pub fn from_generator_config(
246 full_config: &GeneratorConfig,
247 coa: Arc<ChartOfAccounts>,
248 start_date: NaiveDate,
249 end_date: NaiveDate,
250 seed: u64,
251 ) -> Self {
252 let companies: Vec<String> = full_config
253 .companies
254 .iter()
255 .map(|c| c.code.clone())
256 .collect();
257
258 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
260
261 let mut generator = Self::new_with_full_config(
262 full_config.transactions.clone(),
263 coa,
264 companies,
265 start_date,
266 end_date,
267 seed,
268 full_config.templates.clone(),
269 None,
270 );
271
272 generator.company_selector = company_selector;
274
275 generator.fraud_config = full_config.fraud.clone();
277
278 let temporal_config = &full_config.temporal_patterns;
280 if temporal_config.enabled {
281 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
282 }
283
284 generator
285 }
286
287 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
294 if config.business_days.enabled {
296 let region = config
297 .calendars
298 .regions
299 .first()
300 .map(|r| Self::parse_region(r))
301 .unwrap_or(Region::US);
302
303 let calendar = HolidayCalendar::new(region, self.start_date.year());
304 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
305 }
306
307 if config.processing_lags.enabled {
309 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
310 self.processing_lag_calculator =
311 Some(ProcessingLagCalculator::with_config(seed, lag_config));
312 }
313
314 let model = config.period_end.model.as_deref().unwrap_or("flat");
316 if model != "flat"
317 || config
318 .period_end
319 .month_end
320 .as_ref()
321 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
322 {
323 let dynamics = Self::convert_period_end_config(&config.period_end);
324 self.temporal_sampler.set_period_end_dynamics(dynamics);
325 }
326
327 self.temporal_patterns_config = Some(config);
328 self
329 }
330
331 pub fn with_country_pack_temporal(
339 mut self,
340 config: TemporalPatternsConfig,
341 seed: u64,
342 pack: &CountryPack,
343 ) -> Self {
344 if config.business_days.enabled {
346 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
347 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
348 }
349
350 if config.processing_lags.enabled {
352 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
353 self.processing_lag_calculator =
354 Some(ProcessingLagCalculator::with_config(seed, lag_config));
355 }
356
357 let model = config.period_end.model.as_deref().unwrap_or("flat");
359 if model != "flat"
360 || config
361 .period_end
362 .month_end
363 .as_ref()
364 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
365 {
366 let dynamics = Self::convert_period_end_config(&config.period_end);
367 self.temporal_sampler.set_period_end_dynamics(dynamics);
368 }
369
370 self.temporal_patterns_config = Some(config);
371 self
372 }
373
374 fn convert_processing_lag_config(
376 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
377 ) -> ProcessingLagConfig {
378 let mut config = ProcessingLagConfig {
379 enabled: schema.enabled,
380 ..Default::default()
381 };
382
383 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
385 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
386 if let Some(min) = lag.min_hours {
387 dist.min_lag_hours = min;
388 }
389 if let Some(max) = lag.max_hours {
390 dist.max_lag_hours = max;
391 }
392 dist
393 };
394
395 if let Some(ref lag) = schema.sales_order_lag {
397 config
398 .event_lags
399 .insert(EventType::SalesOrder, convert_lag(lag));
400 }
401 if let Some(ref lag) = schema.purchase_order_lag {
402 config
403 .event_lags
404 .insert(EventType::PurchaseOrder, convert_lag(lag));
405 }
406 if let Some(ref lag) = schema.goods_receipt_lag {
407 config
408 .event_lags
409 .insert(EventType::GoodsReceipt, convert_lag(lag));
410 }
411 if let Some(ref lag) = schema.invoice_receipt_lag {
412 config
413 .event_lags
414 .insert(EventType::InvoiceReceipt, convert_lag(lag));
415 }
416 if let Some(ref lag) = schema.invoice_issue_lag {
417 config
418 .event_lags
419 .insert(EventType::InvoiceIssue, convert_lag(lag));
420 }
421 if let Some(ref lag) = schema.payment_lag {
422 config
423 .event_lags
424 .insert(EventType::Payment, convert_lag(lag));
425 }
426 if let Some(ref lag) = schema.journal_entry_lag {
427 config
428 .event_lags
429 .insert(EventType::JournalEntry, convert_lag(lag));
430 }
431
432 if let Some(ref cross_day) = schema.cross_day_posting {
434 config.cross_day = CrossDayConfig {
435 enabled: cross_day.enabled,
436 probability_by_hour: cross_day.probability_by_hour.clone(),
437 ..Default::default()
438 };
439 }
440
441 config
442 }
443
444 fn convert_period_end_config(
446 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
447 ) -> PeriodEndDynamics {
448 let model_type = schema.model.as_deref().unwrap_or("exponential");
449
450 let convert_period =
452 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
453 default_peak: f64|
454 -> PeriodEndConfig {
455 if let Some(p) = period {
456 let model = match model_type {
457 "flat" => PeriodEndModel::FlatMultiplier {
458 multiplier: p.peak_multiplier.unwrap_or(default_peak),
459 },
460 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
461 start_day: p.start_day.unwrap_or(-10),
462 sustained_high_days: p.sustained_high_days.unwrap_or(3),
463 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
464 ramp_up_days: 3, },
466 _ => PeriodEndModel::ExponentialAcceleration {
467 start_day: p.start_day.unwrap_or(-10),
468 base_multiplier: p.base_multiplier.unwrap_or(1.0),
469 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
470 decay_rate: p.decay_rate.unwrap_or(0.3),
471 },
472 };
473 PeriodEndConfig {
474 enabled: true,
475 model,
476 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
477 }
478 } else {
479 PeriodEndConfig {
480 enabled: true,
481 model: PeriodEndModel::ExponentialAcceleration {
482 start_day: -10,
483 base_multiplier: 1.0,
484 peak_multiplier: default_peak,
485 decay_rate: 0.3,
486 },
487 additional_multiplier: 1.0,
488 }
489 }
490 };
491
492 PeriodEndDynamics::new(
493 convert_period(schema.month_end.as_ref(), 2.0),
494 convert_period(schema.quarter_end.as_ref(), 3.5),
495 convert_period(schema.year_end.as_ref(), 5.0),
496 )
497 }
498
499 fn parse_region(region_str: &str) -> Region {
501 match region_str.to_uppercase().as_str() {
502 "US" => Region::US,
503 "DE" => Region::DE,
504 "GB" => Region::GB,
505 "CN" => Region::CN,
506 "JP" => Region::JP,
507 "IN" => Region::IN,
508 "BR" => Region::BR,
509 "MX" => Region::MX,
510 "AU" => Region::AU,
511 "SG" => Region::SG,
512 "KR" => Region::KR,
513 "FR" => Region::FR,
514 "IT" => Region::IT,
515 "ES" => Region::ES,
516 "CA" => Region::CA,
517 _ => Region::US,
518 }
519 }
520
521 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
523 self.company_selector = selector;
524 }
525
526 pub fn company_selector(&self) -> &WeightedCompanySelector {
528 &self.company_selector
529 }
530
531 pub fn set_fraud_config(&mut self, config: FraudConfig) {
533 self.fraud_config = config;
534 }
535
536 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
541 if !vendors.is_empty() {
542 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
543 self.using_real_master_data = true;
544 }
545 self
546 }
547
548 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
553 if !customers.is_empty() {
554 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
555 self.using_real_master_data = true;
556 }
557 self
558 }
559
560 pub fn with_materials(mut self, materials: &[Material]) -> Self {
564 if !materials.is_empty() {
565 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
566 self.using_real_master_data = true;
567 }
568 self
569 }
570
571 pub fn with_master_data(
576 self,
577 vendors: &[Vendor],
578 customers: &[Customer],
579 materials: &[Material],
580 ) -> Self {
581 self.with_vendors(vendors)
582 .with_customers(customers)
583 .with_materials(materials)
584 }
585
586 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
593 let name_gen =
594 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
595 let config = UserGeneratorConfig {
596 culture_distribution: Vec::new(),
599 email_domain: name_gen.email_domain().to_string(),
600 generate_realistic_names: true,
601 };
602 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
603 self.user_pool = Some(user_gen.generate_standard(&self.companies));
604 self
605 }
606
607 pub fn is_using_real_master_data(&self) -> bool {
609 self.using_real_master_data
610 }
611
612 fn determine_fraud(&mut self) -> Option<FraudType> {
614 if !self.fraud_config.enabled {
615 return None;
616 }
617
618 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
620 return None;
621 }
622
623 Some(self.select_fraud_type())
625 }
626
627 fn select_fraud_type(&mut self) -> FraudType {
629 let dist = &self.fraud_config.fraud_type_distribution;
630 let roll: f64 = self.rng.random();
631
632 let mut cumulative = 0.0;
633
634 cumulative += dist.suspense_account_abuse;
635 if roll < cumulative {
636 return FraudType::SuspenseAccountAbuse;
637 }
638
639 cumulative += dist.fictitious_transaction;
640 if roll < cumulative {
641 return FraudType::FictitiousTransaction;
642 }
643
644 cumulative += dist.revenue_manipulation;
645 if roll < cumulative {
646 return FraudType::RevenueManipulation;
647 }
648
649 cumulative += dist.expense_capitalization;
650 if roll < cumulative {
651 return FraudType::ExpenseCapitalization;
652 }
653
654 cumulative += dist.split_transaction;
655 if roll < cumulative {
656 return FraudType::SplitTransaction;
657 }
658
659 cumulative += dist.timing_anomaly;
660 if roll < cumulative {
661 return FraudType::TimingAnomaly;
662 }
663
664 cumulative += dist.unauthorized_access;
665 if roll < cumulative {
666 return FraudType::UnauthorizedAccess;
667 }
668
669 FraudType::DuplicatePayment
671 }
672
673 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
675 match fraud_type {
676 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
677 FraudAmountPattern::ThresholdAdjacent
678 }
679 FraudType::FictitiousTransaction
680 | FraudType::FictitiousEntry
681 | FraudType::SuspenseAccountAbuse
682 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
683 FraudType::RevenueManipulation
684 | FraudType::ExpenseCapitalization
685 | FraudType::ImproperCapitalization
686 | FraudType::ReserveManipulation
687 | FraudType::UnauthorizedAccess
688 | FraudType::PrematureRevenue
689 | FraudType::UnderstatedLiabilities
690 | FraudType::OverstatedAssets
691 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
692 FraudType::DuplicatePayment
693 | FraudType::TimingAnomaly
694 | FraudType::SelfApproval
695 | FraudType::ExceededApprovalLimit
696 | FraudType::SegregationOfDutiesViolation
697 | FraudType::UnauthorizedApproval
698 | FraudType::CollusiveApproval
699 | FraudType::FictitiousVendor
700 | FraudType::ShellCompanyPayment
701 | FraudType::Kickback
702 | FraudType::KickbackScheme
703 | FraudType::InvoiceManipulation
704 | FraudType::AssetMisappropriation
705 | FraudType::InventoryTheft
706 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
707 FraudType::ImproperRevenueRecognition
709 | FraudType::ImproperPoAllocation
710 | FraudType::VariableConsiderationManipulation
711 | FraudType::ContractModificationMisstatement => {
712 FraudAmountPattern::StatisticallyImprobable
713 }
714 FraudType::LeaseClassificationManipulation
716 | FraudType::OffBalanceSheetLease
717 | FraudType::LeaseLiabilityUnderstatement
718 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
719 FraudType::FairValueHierarchyManipulation
721 | FraudType::Level3InputManipulation
722 | FraudType::ValuationTechniqueManipulation => {
723 FraudAmountPattern::StatisticallyImprobable
724 }
725 FraudType::DelayedImpairment
727 | FraudType::ImpairmentTestAvoidance
728 | FraudType::CashFlowProjectionManipulation
729 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
730 FraudType::BidRigging
732 | FraudType::PhantomVendorContract
733 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
734 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
735 FraudType::GhostEmployeePayroll
737 | FraudType::PayrollInflation
738 | FraudType::DuplicateExpenseReport
739 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
740 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
741 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
743 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
744 }
745 }
746
747 #[inline]
749 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
750 self.uuid_factory.next()
751 }
752
753 const COST_CENTER_POOL: &'static [&'static str] =
755 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
756
757 fn enrich_line_items(&self, entry: &mut JournalEntry) {
763 let posting_date = entry.header.posting_date;
764 let company_code = &entry.header.company_code;
765 let header_text = entry.header.header_text.clone();
766 let business_process = entry.header.business_process;
767
768 let doc_id_bytes = entry.header.document_id.as_bytes();
770 let mut cc_seed: usize = 0;
771 for &b in doc_id_bytes {
772 cc_seed = cc_seed.wrapping_add(b as usize);
773 }
774
775 for (i, line) in entry.lines.iter_mut().enumerate() {
776 if line.account_description.is_none() {
778 line.account_description = self
779 .coa
780 .get_account(&line.gl_account)
781 .map(|a| a.short_description.clone());
782 }
783
784 if line.cost_center.is_none() {
786 let first_char = line.gl_account.chars().next().unwrap_or('0');
787 if first_char == '5' || first_char == '6' {
788 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
789 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
790 }
791 }
792
793 if line.profit_center.is_none() {
795 let suffix = match business_process {
796 Some(BusinessProcess::P2P) => "-P2P",
797 Some(BusinessProcess::O2C) => "-O2C",
798 Some(BusinessProcess::R2R) => "-R2R",
799 Some(BusinessProcess::H2R) => "-H2R",
800 _ => "",
801 };
802 line.profit_center = Some(format!("PC-{company_code}{suffix}"));
803 }
804
805 if line.line_text.is_none() {
807 line.line_text = header_text.clone();
808 }
809
810 if line.value_date.is_none()
812 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
813 {
814 line.value_date = Some(posting_date);
815 }
816
817 if line.assignment.is_none() {
819 if line.gl_account.starts_with("2000") {
820 if let Some(ref ht) = header_text {
822 if let Some(vendor_part) = ht.rsplit(" - ").next() {
824 if vendor_part.starts_with("V-")
825 || vendor_part.starts_with("VENDOR")
826 || vendor_part.starts_with("Vendor")
827 {
828 line.assignment = Some(vendor_part.to_string());
829 }
830 }
831 }
832 } else if line.gl_account.starts_with("1100") {
833 if let Some(ref ht) = header_text {
835 if let Some(customer_part) = ht.rsplit(" - ").next() {
836 if customer_part.starts_with("C-")
837 || customer_part.starts_with("CUST")
838 || customer_part.starts_with("Customer")
839 {
840 line.assignment = Some(customer_part.to_string());
841 }
842 }
843 }
844 }
845 }
846 }
847 }
848
849 pub fn generate(&mut self) -> JournalEntry {
851 debug!(
852 count = self.count,
853 companies = self.companies.len(),
854 start_date = %self.start_date,
855 end_date = %self.end_date,
856 "Generating journal entry"
857 );
858
859 if let Some(ref state) = self.batch_state {
861 if state.remaining > 0 {
862 return self.generate_batched_entry();
863 }
864 }
865
866 self.count += 1;
867
868 let document_id = self.generate_deterministic_uuid();
870
871 let mut posting_date = self
873 .temporal_sampler
874 .sample_date(self.start_date, self.end_date);
875
876 if let Some(ref calc) = self.business_day_calculator {
878 if !calc.is_business_day(posting_date) {
879 posting_date = calc.next_business_day(posting_date, false);
881 if posting_date > self.end_date {
883 posting_date = calc.prev_business_day(self.end_date, true);
884 }
885 }
886 }
887
888 let company_code = self.company_selector.select(&mut self.rng).to_string();
890
891 let line_spec = self.line_sampler.sample();
893
894 let source = self.select_source();
896 let is_automated = matches!(
897 source,
898 TransactionSource::Automated | TransactionSource::Recurring
899 );
900
901 let business_process = self.select_business_process();
903
904 let fraud_type = self.determine_fraud();
906 let is_fraud = fraud_type.is_some();
907
908 let time = self.temporal_sampler.sample_time(!is_automated);
910 let created_at = posting_date.and_time(time).and_utc();
911
912 let (created_by, user_persona) = self.select_user(is_automated);
914
915 let mut header =
917 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
918 header.created_at = created_at;
919 header.source = source;
920 header.created_by = created_by;
921 header.user_persona = user_persona;
922 header.business_process = Some(business_process);
923 header.document_type = Self::document_type_for_process(business_process).to_string();
924 header.is_fraud = is_fraud;
925 header.fraud_type = fraud_type;
926
927 let mut context =
929 DescriptionContext::with_period(posting_date.month(), posting_date.year());
930
931 match business_process {
933 BusinessProcess::P2P => {
934 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
935 context.vendor_name = Some(vendor.name.clone());
936 }
937 }
938 BusinessProcess::O2C => {
939 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
940 context.customer_name = Some(customer.name.clone());
941 }
942 }
943 _ => {}
944 }
945
946 if self.template_config.descriptions.generate_header_text {
948 header.header_text = Some(self.description_generator.generate_header_text(
949 business_process,
950 &context,
951 &mut self.rng,
952 ));
953 }
954
955 if self.template_config.references.generate_references {
957 header.reference = Some(
958 self.reference_generator
959 .generate_for_process_year(business_process, posting_date.year()),
960 );
961 }
962
963 let mut entry = JournalEntry::new(header);
965
966 let base_amount = if let Some(ft) = fraud_type {
968 let pattern = self.fraud_type_to_amount_pattern(ft);
969 self.amount_sampler.sample_fraud(pattern)
970 } else {
971 self.amount_sampler.sample()
972 };
973
974 let drift_adjusted_amount = {
976 let drift = self.get_drift_adjustments(posting_date);
977 if drift.amount_mean_multiplier != 1.0 {
978 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
980 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
981 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
982 } else {
983 base_amount
984 }
985 };
986
987 let total_amount = if is_automated {
989 drift_adjusted_amount } else {
991 self.apply_human_variation(drift_adjusted_amount)
992 };
993
994 let debit_amounts = self
996 .amount_sampler
997 .sample_summing_to(line_spec.debit_count, total_amount);
998 for (i, amount) in debit_amounts.into_iter().enumerate() {
999 let account_number = self.select_debit_account().account_number.clone();
1000 let mut line = JournalEntryLine::debit(
1001 entry.header.document_id,
1002 (i + 1) as u32,
1003 account_number.clone(),
1004 amount,
1005 );
1006
1007 if self.template_config.descriptions.generate_line_text {
1009 line.line_text = Some(self.description_generator.generate_line_text(
1010 &account_number,
1011 &context,
1012 &mut self.rng,
1013 ));
1014 }
1015
1016 entry.add_line(line);
1017 }
1018
1019 let credit_amounts = self
1021 .amount_sampler
1022 .sample_summing_to(line_spec.credit_count, total_amount);
1023 for (i, amount) in credit_amounts.into_iter().enumerate() {
1024 let account_number = self.select_credit_account().account_number.clone();
1025 let mut line = JournalEntryLine::credit(
1026 entry.header.document_id,
1027 (line_spec.debit_count + i + 1) as u32,
1028 account_number.clone(),
1029 amount,
1030 );
1031
1032 if self.template_config.descriptions.generate_line_text {
1034 line.line_text = Some(self.description_generator.generate_line_text(
1035 &account_number,
1036 &context,
1037 &mut self.rng,
1038 ));
1039 }
1040
1041 entry.add_line(line);
1042 }
1043
1044 self.enrich_line_items(&mut entry);
1046
1047 if self.persona_errors_enabled && !is_automated {
1049 self.maybe_inject_persona_error(&mut entry);
1050 }
1051
1052 if self.approval_enabled {
1054 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1055 }
1056
1057 self.maybe_start_batch(&entry);
1059
1060 entry
1061 }
1062
1063 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1068 self.persona_errors_enabled = enabled;
1069 self
1070 }
1071
1072 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1077 self.fraud_config = config;
1078 self
1079 }
1080
1081 pub fn persona_errors_enabled(&self) -> bool {
1083 self.persona_errors_enabled
1084 }
1085
1086 pub fn with_batching(mut self, enabled: bool) -> Self {
1091 if !enabled {
1092 self.batch_state = None;
1093 }
1094 self
1095 }
1096
1097 pub fn batching_enabled(&self) -> bool {
1099 true
1101 }
1102
1103 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1108 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1110 return;
1111 }
1112
1113 if self.rng.random::<f64>() > 0.15 {
1115 return;
1116 }
1117
1118 let base_account = entry
1120 .lines
1121 .first()
1122 .map(|l| l.gl_account.clone())
1123 .unwrap_or_default();
1124
1125 let base_amount = entry.total_debit();
1126
1127 self.batch_state = Some(BatchState {
1128 base_account_number: base_account,
1129 base_amount,
1130 base_business_process: entry.header.business_process,
1131 base_posting_date: entry.header.posting_date,
1132 remaining: self.rng.random_range(2..7), });
1134 }
1135
1136 fn generate_batched_entry(&mut self) -> JournalEntry {
1144 use rust_decimal::Decimal;
1145
1146 if let Some(ref mut state) = self.batch_state {
1148 state.remaining = state.remaining.saturating_sub(1);
1149 }
1150
1151 let Some(batch) = self.batch_state.clone() else {
1152 tracing::warn!(
1155 "generate_batched_entry called without batch_state; generating standard entry"
1156 );
1157 self.batch_state = None;
1158 return self.generate();
1159 };
1160
1161 let posting_date = batch.base_posting_date;
1163
1164 self.count += 1;
1165 let document_id = self.generate_deterministic_uuid();
1166
1167 let company_code = self.company_selector.select(&mut self.rng).to_string();
1169
1170 let _line_spec = LineItemSpec {
1172 total_count: 2,
1173 debit_count: 1,
1174 credit_count: 1,
1175 split_type: DebitCreditSplit::Equal,
1176 };
1177
1178 let source = TransactionSource::Manual;
1180
1181 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1183
1184 let time = self.temporal_sampler.sample_time(true);
1186 let created_at = posting_date.and_time(time).and_utc();
1187
1188 let (created_by, user_persona) = self.select_user(false);
1190
1191 let mut header =
1193 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1194 header.created_at = created_at;
1195 header.source = source;
1196 header.created_by = created_by;
1197 header.user_persona = user_persona;
1198 header.business_process = Some(business_process);
1199 header.document_type = Self::document_type_for_process(business_process).to_string();
1200
1201 let variation = self.rng.random_range(-0.15..0.15);
1203 let varied_amount =
1204 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1205 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1206
1207 let mut entry = JournalEntry::new(header);
1209
1210 let debit_line = JournalEntryLine::debit(
1212 entry.header.document_id,
1213 1,
1214 batch.base_account_number.clone(),
1215 total_amount,
1216 );
1217 entry.add_line(debit_line);
1218
1219 let credit_account = self.select_credit_account().account_number.clone();
1221 let credit_line =
1222 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1223 entry.add_line(credit_line);
1224
1225 self.enrich_line_items(&mut entry);
1227
1228 if self.persona_errors_enabled {
1230 self.maybe_inject_persona_error(&mut entry);
1231 }
1232
1233 if self.approval_enabled {
1235 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1236 }
1237
1238 if batch.remaining <= 1 {
1240 self.batch_state = None;
1241 }
1242
1243 entry
1244 }
1245
1246 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1248 let persona_str = &entry.header.user_persona;
1250 let persona = match persona_str.to_lowercase().as_str() {
1251 s if s.contains("junior") => UserPersona::JuniorAccountant,
1252 s if s.contains("senior") => UserPersona::SeniorAccountant,
1253 s if s.contains("controller") => UserPersona::Controller,
1254 s if s.contains("manager") => UserPersona::Manager,
1255 s if s.contains("executive") => UserPersona::Executive,
1256 _ => return, };
1258
1259 let base_error_rate = persona.error_rate();
1261
1262 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1264
1265 if self.rng.random::<f64>() >= adjusted_rate {
1267 return; }
1269
1270 self.inject_human_error(entry, persona);
1272 }
1273
1274 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1283 use chrono::Datelike;
1284
1285 let mut rate = base_rate;
1286 let day = posting_date.day();
1287 let month = posting_date.month();
1288
1289 if month == 12 && day >= 28 {
1291 rate *= 2.0;
1292 return rate.min(0.5); }
1294
1295 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1297 rate *= 1.75; return rate.min(0.4);
1299 }
1300
1301 if day >= 28 {
1303 rate *= 1.5; }
1305
1306 let weekday = posting_date.weekday();
1308 match weekday {
1309 chrono::Weekday::Mon => {
1310 rate *= 1.2;
1312 }
1313 chrono::Weekday::Fri => {
1314 rate *= 1.3;
1316 }
1317 _ => {}
1318 }
1319
1320 rate.min(0.4)
1322 }
1323
1324 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1333 use rust_decimal::Decimal;
1334
1335 if amount < Decimal::from(10) {
1337 return amount;
1338 }
1339
1340 if self.rng.random::<f64>() > 0.70 {
1342 return amount;
1343 }
1344
1345 let variation_type: u8 = self.rng.random_range(0..4);
1347
1348 match variation_type {
1349 0 => {
1350 let variation_pct = self.rng.random_range(-0.02..0.02);
1352 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1353 (amount + variation).round_dp(2)
1354 }
1355 1 => {
1356 let ten = Decimal::from(10);
1358 (amount / ten).round() * ten
1359 }
1360 2 => {
1361 if amount >= Decimal::from(500) {
1363 let hundred = Decimal::from(100);
1364 (amount / hundred).round() * hundred
1365 } else {
1366 amount
1367 }
1368 }
1369 3 => {
1370 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1372 (amount + cents).max(Decimal::ZERO).round_dp(2)
1373 }
1374 _ => amount,
1375 }
1376 }
1377
1378 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1384 let balancing_idx = entry.lines.iter().position(|l| {
1386 if modified_was_debit {
1387 l.credit_amount > Decimal::ZERO
1388 } else {
1389 l.debit_amount > Decimal::ZERO
1390 }
1391 });
1392
1393 if let Some(idx) = balancing_idx {
1394 if modified_was_debit {
1395 entry.lines[idx].credit_amount += impact;
1396 } else {
1397 entry.lines[idx].debit_amount += impact;
1398 }
1399 }
1400 }
1401
1402 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1407 use rust_decimal::Decimal;
1408
1409 let error_type: u8 = match persona {
1411 UserPersona::JuniorAccountant => {
1412 self.rng.random_range(0..5)
1414 }
1415 UserPersona::SeniorAccountant => {
1416 self.rng.random_range(0..3)
1418 }
1419 UserPersona::Controller | UserPersona::Manager => {
1420 self.rng.random_range(3..5)
1422 }
1423 _ => return,
1424 };
1425
1426 match error_type {
1427 0 => {
1428 if let Some(line) = entry.lines.get_mut(0) {
1430 let is_debit = line.debit_amount > Decimal::ZERO;
1431 let original_amount = if is_debit {
1432 line.debit_amount
1433 } else {
1434 line.credit_amount
1435 };
1436
1437 let s = original_amount.to_string();
1439 if s.len() >= 2 {
1440 let chars: Vec<char> = s.chars().collect();
1441 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1442 if chars[pos].is_ascii_digit()
1443 && chars.get(pos + 1).is_some_and(char::is_ascii_digit)
1444 {
1445 let mut new_chars = chars;
1446 new_chars.swap(pos, pos + 1);
1447 if let Ok(new_amount) =
1448 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1449 {
1450 let impact = new_amount - original_amount;
1451
1452 if is_debit {
1454 entry.lines[0].debit_amount = new_amount;
1455 } else {
1456 entry.lines[0].credit_amount = new_amount;
1457 }
1458
1459 Self::rebalance_entry(entry, is_debit, impact);
1461
1462 entry.header.header_text = Some(
1463 entry.header.header_text.clone().unwrap_or_default()
1464 + " [HUMAN_ERROR:TRANSPOSITION]",
1465 );
1466 }
1467 }
1468 }
1469 }
1470 }
1471 1 => {
1472 if let Some(line) = entry.lines.get_mut(0) {
1474 let is_debit = line.debit_amount > Decimal::ZERO;
1475 let original_amount = if is_debit {
1476 line.debit_amount
1477 } else {
1478 line.credit_amount
1479 };
1480
1481 let new_amount = original_amount * Decimal::new(10, 0);
1482 let impact = new_amount - original_amount;
1483
1484 if is_debit {
1486 entry.lines[0].debit_amount = new_amount;
1487 } else {
1488 entry.lines[0].credit_amount = new_amount;
1489 }
1490
1491 Self::rebalance_entry(entry, is_debit, impact);
1493
1494 entry.header.header_text = Some(
1495 entry.header.header_text.clone().unwrap_or_default()
1496 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1497 );
1498 }
1499 }
1500 2 => {
1501 if let Some(ref mut text) = entry.header.header_text {
1503 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1504 let correct = ["the", "and", "with", "that", "receive"];
1505 let idx = self.rng.random_range(0..typos.len());
1506 if text.to_lowercase().contains(correct[idx]) {
1507 *text = text.replace(correct[idx], typos[idx]);
1508 *text = format!("{text} [HUMAN_ERROR:TYPO]");
1509 }
1510 }
1511 }
1512 3 => {
1513 if let Some(line) = entry.lines.get_mut(0) {
1515 let is_debit = line.debit_amount > Decimal::ZERO;
1516 let original_amount = if is_debit {
1517 line.debit_amount
1518 } else {
1519 line.credit_amount
1520 };
1521
1522 let new_amount =
1523 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1524 let impact = new_amount - original_amount;
1525
1526 if is_debit {
1528 entry.lines[0].debit_amount = new_amount;
1529 } else {
1530 entry.lines[0].credit_amount = new_amount;
1531 }
1532
1533 Self::rebalance_entry(entry, is_debit, impact);
1535
1536 entry.header.header_text = Some(
1537 entry.header.header_text.clone().unwrap_or_default()
1538 + " [HUMAN_ERROR:ROUNDED]",
1539 );
1540 }
1541 }
1542 4 => {
1543 if entry.header.document_date == entry.header.posting_date {
1546 let days_late = self.rng.random_range(5..15);
1547 entry.header.document_date =
1548 entry.header.posting_date - chrono::Duration::days(days_late);
1549 entry.header.header_text = Some(
1550 entry.header.header_text.clone().unwrap_or_default()
1551 + " [HUMAN_ERROR:LATE_POSTING]",
1552 );
1553 }
1554 }
1555 _ => {}
1556 }
1557 }
1558
1559 fn maybe_apply_approval_workflow(
1564 &mut self,
1565 entry: &mut JournalEntry,
1566 _posting_date: NaiveDate,
1567 ) {
1568 use rust_decimal::Decimal;
1569
1570 let amount = entry.total_debit();
1571
1572 if amount <= self.approval_threshold {
1574 let workflow = ApprovalWorkflow::auto_approved(
1576 entry.header.created_by.clone(),
1577 entry.header.user_persona.clone(),
1578 amount,
1579 entry.header.created_at,
1580 );
1581 entry.header.approval_workflow = Some(workflow);
1582 return;
1583 }
1584
1585 entry.header.sox_relevant = true;
1587
1588 let required_levels = if amount > Decimal::new(100000, 0) {
1590 3 } else if amount > Decimal::new(50000, 0) {
1592 2 } else {
1594 1 };
1596
1597 let mut workflow = ApprovalWorkflow::new(
1599 entry.header.created_by.clone(),
1600 entry.header.user_persona.clone(),
1601 amount,
1602 );
1603 workflow.required_levels = required_levels;
1604
1605 let submit_time = entry.header.created_at;
1607 let submit_action = ApprovalAction::new(
1608 entry.header.created_by.clone(),
1609 entry.header.user_persona.clone(),
1610 self.parse_persona(&entry.header.user_persona),
1611 ApprovalActionType::Submit,
1612 0,
1613 )
1614 .with_timestamp(submit_time);
1615
1616 workflow.actions.push(submit_action);
1617 workflow.status = ApprovalStatus::Pending;
1618 workflow.submitted_at = Some(submit_time);
1619
1620 let mut current_time = submit_time;
1622 for level in 1..=required_levels {
1623 let delay_hours = self.rng.random_range(1..4);
1625 current_time += chrono::Duration::hours(delay_hours);
1626
1627 while current_time.weekday() == chrono::Weekday::Sat
1629 || current_time.weekday() == chrono::Weekday::Sun
1630 {
1631 current_time += chrono::Duration::days(1);
1632 }
1633
1634 let (approver_id, approver_role) = self.select_approver(level);
1636
1637 let approve_action = ApprovalAction::new(
1638 approver_id.clone(),
1639 approver_role.to_string(),
1640 approver_role,
1641 ApprovalActionType::Approve,
1642 level,
1643 )
1644 .with_timestamp(current_time);
1645
1646 workflow.actions.push(approve_action);
1647 workflow.current_level = level;
1648 }
1649
1650 workflow.status = ApprovalStatus::Approved;
1652 workflow.approved_at = Some(current_time);
1653
1654 entry.header.approval_workflow = Some(workflow);
1655 }
1656
1657 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1659 let persona = match level {
1660 1 => UserPersona::Manager,
1661 2 => UserPersona::Controller,
1662 _ => UserPersona::Executive,
1663 };
1664
1665 if let Some(ref pool) = self.user_pool {
1667 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1668 return (user.user_id.clone(), persona);
1669 }
1670 }
1671
1672 let approver_id = match persona {
1674 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1675 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1676 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1677 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1678 };
1679
1680 (approver_id, persona)
1681 }
1682
1683 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1685 match persona_str.to_lowercase().as_str() {
1686 s if s.contains("junior") => UserPersona::JuniorAccountant,
1687 s if s.contains("senior") => UserPersona::SeniorAccountant,
1688 s if s.contains("controller") => UserPersona::Controller,
1689 s if s.contains("manager") => UserPersona::Manager,
1690 s if s.contains("executive") => UserPersona::Executive,
1691 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1692 _ => UserPersona::JuniorAccountant, }
1694 }
1695
1696 pub fn with_approval(mut self, enabled: bool) -> Self {
1698 self.approval_enabled = enabled;
1699 self
1700 }
1701
1702 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1704 self.approval_threshold = threshold;
1705 self
1706 }
1707
1708 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1714 self.drift_controller = Some(controller);
1715 self
1716 }
1717
1718 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1723 if config.enabled {
1724 let total_periods = self.calculate_total_periods();
1725 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1726 }
1727 self
1728 }
1729
1730 fn calculate_total_periods(&self) -> u32 {
1732 let start_year = self.start_date.year();
1733 let start_month = self.start_date.month();
1734 let end_year = self.end_date.year();
1735 let end_month = self.end_date.month();
1736
1737 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1738 }
1739
1740 fn date_to_period(&self, date: NaiveDate) -> u32 {
1742 let start_year = self.start_date.year();
1743 let start_month = self.start_date.month() as i32;
1744 let date_year = date.year();
1745 let date_month = date.month() as i32;
1746
1747 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1748 }
1749
1750 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1752 if let Some(ref controller) = self.drift_controller {
1753 let period = self.date_to_period(date);
1754 controller.compute_adjustments(period)
1755 } else {
1756 DriftAdjustments::none()
1757 }
1758 }
1759
1760 #[inline]
1762 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1763 if let Some(ref pool) = self.user_pool {
1764 let persona = if is_automated {
1765 UserPersona::AutomatedSystem
1766 } else {
1767 let roll: f64 = self.rng.random();
1769 if roll < 0.4 {
1770 UserPersona::JuniorAccountant
1771 } else if roll < 0.7 {
1772 UserPersona::SeniorAccountant
1773 } else if roll < 0.85 {
1774 UserPersona::Controller
1775 } else {
1776 UserPersona::Manager
1777 }
1778 };
1779
1780 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1781 return (user.user_id.clone(), user.persona.to_string());
1782 }
1783 }
1784
1785 if is_automated {
1787 (
1788 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1789 "automated_system".to_string(),
1790 )
1791 } else {
1792 (
1793 format!("USER{:04}", self.rng.random_range(1..=40)),
1794 "senior_accountant".to_string(),
1795 )
1796 }
1797 }
1798
1799 #[inline]
1801 fn select_source(&mut self) -> TransactionSource {
1802 let roll: f64 = self.rng.random();
1803 let dist = &self.config.source_distribution;
1804
1805 if roll < dist.manual {
1806 TransactionSource::Manual
1807 } else if roll < dist.manual + dist.automated {
1808 TransactionSource::Automated
1809 } else if roll < dist.manual + dist.automated + dist.recurring {
1810 TransactionSource::Recurring
1811 } else {
1812 TransactionSource::Adjustment
1813 }
1814 }
1815
1816 #[inline]
1818 fn document_type_for_process(process: BusinessProcess) -> &'static str {
1827 match process {
1828 BusinessProcess::P2P => "KR",
1829 BusinessProcess::O2C => "DR",
1830 BusinessProcess::R2R => "SA",
1831 BusinessProcess::H2R => "HR",
1832 BusinessProcess::A2R => "AA",
1833 _ => "SA",
1834 }
1835 }
1836
1837 fn select_business_process(&mut self) -> BusinessProcess {
1838 let roll: f64 = self.rng.random();
1839
1840 if roll < 0.35 {
1842 BusinessProcess::O2C
1843 } else if roll < 0.65 {
1844 BusinessProcess::P2P
1845 } else if roll < 0.85 {
1846 BusinessProcess::R2R
1847 } else if roll < 0.95 {
1848 BusinessProcess::H2R
1849 } else {
1850 BusinessProcess::A2R
1851 }
1852 }
1853
1854 #[inline]
1855 fn select_debit_account(&mut self) -> &GLAccount {
1856 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1857 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1858
1859 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1861 accounts
1862 } else {
1863 expense_accounts
1864 };
1865
1866 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1867 tracing::warn!(
1868 "Account selection returned empty list, falling back to first COA account"
1869 );
1870 &self.coa.accounts[0]
1871 })
1872 }
1873
1874 #[inline]
1875 fn select_credit_account(&mut self) -> &GLAccount {
1876 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1877 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1878
1879 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1881 liability_accounts
1882 } else {
1883 revenue_accounts
1884 };
1885
1886 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1887 tracing::warn!(
1888 "Account selection returned empty list, falling back to first COA account"
1889 );
1890 &self.coa.accounts[0]
1891 })
1892 }
1893}
1894
1895impl Generator for JournalEntryGenerator {
1896 type Item = JournalEntry;
1897 type Config = (
1898 TransactionConfig,
1899 Arc<ChartOfAccounts>,
1900 Vec<String>,
1901 NaiveDate,
1902 NaiveDate,
1903 );
1904
1905 fn new(config: Self::Config, seed: u64) -> Self {
1906 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1907 }
1908
1909 fn generate_one(&mut self) -> Self::Item {
1910 self.generate()
1911 }
1912
1913 fn reset(&mut self) {
1914 self.rng = seeded_rng(self.seed, 0);
1915 self.line_sampler.reset(self.seed + 1);
1916 self.amount_sampler.reset(self.seed + 2);
1917 self.temporal_sampler.reset(self.seed + 3);
1918 self.count = 0;
1919 self.uuid_factory.reset();
1920
1921 let mut ref_gen = ReferenceGenerator::new(
1923 self.start_date.year(),
1924 self.companies
1925 .first()
1926 .map(std::string::String::as_str)
1927 .unwrap_or("1000"),
1928 );
1929 ref_gen.set_prefix(
1930 ReferenceType::Invoice,
1931 &self.template_config.references.invoice_prefix,
1932 );
1933 ref_gen.set_prefix(
1934 ReferenceType::PurchaseOrder,
1935 &self.template_config.references.po_prefix,
1936 );
1937 ref_gen.set_prefix(
1938 ReferenceType::SalesOrder,
1939 &self.template_config.references.so_prefix,
1940 );
1941 self.reference_generator = ref_gen;
1942 }
1943
1944 fn count(&self) -> u64 {
1945 self.count
1946 }
1947
1948 fn seed(&self) -> u64 {
1949 self.seed
1950 }
1951}
1952
1953use datasynth_core::traits::ParallelGenerator;
1954
1955impl ParallelGenerator for JournalEntryGenerator {
1956 fn split(self, parts: usize) -> Vec<Self> {
1962 let parts = parts.max(1);
1963 (0..parts)
1964 .map(|i| {
1965 let sub_seed = self
1967 .seed
1968 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1969
1970 let mut gen = JournalEntryGenerator::new_with_full_config(
1971 self.config.clone(),
1972 Arc::clone(&self.coa),
1973 self.companies.clone(),
1974 self.start_date,
1975 self.end_date,
1976 sub_seed,
1977 self.template_config.clone(),
1978 self.user_pool.clone(),
1979 );
1980
1981 gen.company_selector = self.company_selector.clone();
1983 gen.vendor_pool = self.vendor_pool.clone();
1984 gen.customer_pool = self.customer_pool.clone();
1985 gen.material_pool = self.material_pool.clone();
1986 gen.using_real_master_data = self.using_real_master_data;
1987 gen.fraud_config = self.fraud_config.clone();
1988 gen.persona_errors_enabled = self.persona_errors_enabled;
1989 gen.approval_enabled = self.approval_enabled;
1990 gen.approval_threshold = self.approval_threshold;
1991
1992 gen.uuid_factory = DeterministicUuidFactory::for_partition(
1994 sub_seed,
1995 GeneratorType::JournalEntry,
1996 i as u8,
1997 );
1998
1999 if let Some(ref config) = self.temporal_patterns_config {
2001 gen.temporal_patterns_config = Some(config.clone());
2002 if config.business_days.enabled {
2004 if let Some(ref bdc) = self.business_day_calculator {
2005 gen.business_day_calculator = Some(bdc.clone());
2006 }
2007 }
2008 if config.processing_lags.enabled {
2010 let lag_config =
2011 Self::convert_processing_lag_config(&config.processing_lags);
2012 gen.processing_lag_calculator =
2013 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2014 }
2015 }
2016
2017 if let Some(ref dc) = self.drift_controller {
2019 gen.drift_controller = Some(dc.clone());
2020 }
2021
2022 gen
2023 })
2024 .collect()
2025 }
2026}
2027
2028#[cfg(test)]
2029#[allow(clippy::unwrap_used)]
2030mod tests {
2031 use super::*;
2032 use crate::ChartOfAccountsGenerator;
2033
2034 #[test]
2035 fn test_generate_balanced_entries() {
2036 let mut coa_gen =
2037 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2038 let coa = Arc::new(coa_gen.generate());
2039
2040 let mut je_gen = JournalEntryGenerator::new_with_params(
2041 TransactionConfig::default(),
2042 coa,
2043 vec!["1000".to_string()],
2044 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2045 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2046 42,
2047 );
2048
2049 let mut balanced_count = 0;
2050 for _ in 0..100 {
2051 let entry = je_gen.generate();
2052
2053 let has_human_error = entry
2055 .header
2056 .header_text
2057 .as_ref()
2058 .map(|t| t.contains("[HUMAN_ERROR:"))
2059 .unwrap_or(false);
2060
2061 if !has_human_error {
2062 assert!(
2063 entry.is_balanced(),
2064 "Entry {:?} is not balanced",
2065 entry.header.document_id
2066 );
2067 balanced_count += 1;
2068 }
2069 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2070 }
2071
2072 assert!(
2074 balanced_count >= 80,
2075 "Expected at least 80 balanced entries, got {}",
2076 balanced_count
2077 );
2078 }
2079
2080 #[test]
2081 fn test_deterministic_generation() {
2082 let mut coa_gen =
2083 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2084 let coa = Arc::new(coa_gen.generate());
2085
2086 let mut gen1 = JournalEntryGenerator::new_with_params(
2087 TransactionConfig::default(),
2088 Arc::clone(&coa),
2089 vec!["1000".to_string()],
2090 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2091 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2092 42,
2093 );
2094
2095 let mut gen2 = JournalEntryGenerator::new_with_params(
2096 TransactionConfig::default(),
2097 coa,
2098 vec!["1000".to_string()],
2099 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2100 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2101 42,
2102 );
2103
2104 for _ in 0..50 {
2105 let e1 = gen1.generate();
2106 let e2 = gen2.generate();
2107 assert_eq!(e1.header.document_id, e2.header.document_id);
2108 assert_eq!(e1.total_debit(), e2.total_debit());
2109 }
2110 }
2111
2112 #[test]
2113 fn test_templates_generate_descriptions() {
2114 let mut coa_gen =
2115 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2116 let coa = Arc::new(coa_gen.generate());
2117
2118 let template_config = TemplateConfig {
2120 names: datasynth_config::schema::NameTemplateConfig {
2121 generate_realistic_names: true,
2122 email_domain: "test.com".to_string(),
2123 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2124 },
2125 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2126 generate_header_text: true,
2127 generate_line_text: true,
2128 },
2129 references: datasynth_config::schema::ReferenceTemplateConfig {
2130 generate_references: true,
2131 invoice_prefix: "TEST-INV".to_string(),
2132 po_prefix: "TEST-PO".to_string(),
2133 so_prefix: "TEST-SO".to_string(),
2134 },
2135 };
2136
2137 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2138 TransactionConfig::default(),
2139 coa,
2140 vec!["1000".to_string()],
2141 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2142 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2143 42,
2144 template_config,
2145 None,
2146 )
2147 .with_persona_errors(false); for _ in 0..10 {
2150 let entry = je_gen.generate();
2151
2152 assert!(
2154 entry.header.header_text.is_some(),
2155 "Header text should be populated"
2156 );
2157
2158 assert!(
2160 entry.header.reference.is_some(),
2161 "Reference should be populated"
2162 );
2163
2164 assert!(
2166 entry.header.business_process.is_some(),
2167 "Business process should be set"
2168 );
2169
2170 for line in &entry.lines {
2172 assert!(line.line_text.is_some(), "Line text should be populated");
2173 }
2174
2175 assert!(entry.is_balanced());
2177 }
2178 }
2179
2180 #[test]
2181 fn test_user_pool_integration() {
2182 let mut coa_gen =
2183 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2184 let coa = Arc::new(coa_gen.generate());
2185
2186 let companies = vec!["1000".to_string()];
2187
2188 let mut user_gen = crate::UserGenerator::new(42);
2190 let user_pool = user_gen.generate_standard(&companies);
2191
2192 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2193 TransactionConfig::default(),
2194 coa,
2195 companies,
2196 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2197 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2198 42,
2199 TemplateConfig::default(),
2200 Some(user_pool),
2201 );
2202
2203 for _ in 0..20 {
2205 let entry = je_gen.generate();
2206
2207 assert!(!entry.header.created_by.is_empty());
2210 }
2211 }
2212
2213 #[test]
2214 fn test_master_data_connection() {
2215 let mut coa_gen =
2216 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2217 let coa = Arc::new(coa_gen.generate());
2218
2219 let vendors = vec![
2221 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2222 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2223 ];
2224
2225 let customers = vec![
2227 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2228 Customer::new(
2229 "C-TEST-002",
2230 "Test Customer Two",
2231 CustomerType::SmallBusiness,
2232 ),
2233 ];
2234
2235 let materials = vec![Material::new(
2237 "MAT-TEST-001",
2238 "Test Material A",
2239 MaterialType::RawMaterial,
2240 )];
2241
2242 let generator = JournalEntryGenerator::new_with_params(
2244 TransactionConfig::default(),
2245 coa,
2246 vec!["1000".to_string()],
2247 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2248 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2249 42,
2250 );
2251
2252 assert!(!generator.is_using_real_master_data());
2254
2255 let generator_with_data = generator
2257 .with_vendors(&vendors)
2258 .with_customers(&customers)
2259 .with_materials(&materials);
2260
2261 assert!(generator_with_data.is_using_real_master_data());
2263 }
2264
2265 #[test]
2266 fn test_with_master_data_convenience_method() {
2267 let mut coa_gen =
2268 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2269 let coa = Arc::new(coa_gen.generate());
2270
2271 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2272 let customers = vec![Customer::new(
2273 "C-001",
2274 "Customer One",
2275 CustomerType::Corporate,
2276 )];
2277 let materials = vec![Material::new(
2278 "MAT-001",
2279 "Material One",
2280 MaterialType::RawMaterial,
2281 )];
2282
2283 let generator = JournalEntryGenerator::new_with_params(
2284 TransactionConfig::default(),
2285 coa,
2286 vec!["1000".to_string()],
2287 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2288 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2289 42,
2290 )
2291 .with_master_data(&vendors, &customers, &materials);
2292
2293 assert!(generator.is_using_real_master_data());
2294 }
2295
2296 #[test]
2297 fn test_stress_factors_increase_error_rate() {
2298 let mut coa_gen =
2299 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2300 let coa = Arc::new(coa_gen.generate());
2301
2302 let generator = JournalEntryGenerator::new_with_params(
2303 TransactionConfig::default(),
2304 coa,
2305 vec!["1000".to_string()],
2306 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2307 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2308 42,
2309 );
2310
2311 let base_rate = 0.1;
2312
2313 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2316 assert!(
2317 (regular_rate - base_rate).abs() < 0.01,
2318 "Regular day should have minimal stress factor adjustment"
2319 );
2320
2321 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2324 assert!(
2325 month_end_rate > regular_rate,
2326 "Month end should have higher error rate than regular day"
2327 );
2328
2329 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2332 assert!(
2333 year_end_rate > month_end_rate,
2334 "Year end should have highest error rate"
2335 );
2336
2337 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2340 assert!(
2341 friday_rate > regular_rate,
2342 "Friday should have higher error rate than mid-week"
2343 );
2344
2345 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2348 assert!(
2349 monday_rate > regular_rate,
2350 "Monday should have higher error rate than mid-week"
2351 );
2352 }
2353
2354 #[test]
2355 fn test_batching_produces_similar_entries() {
2356 let mut coa_gen =
2357 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2358 let coa = Arc::new(coa_gen.generate());
2359
2360 let mut je_gen = JournalEntryGenerator::new_with_params(
2362 TransactionConfig::default(),
2363 coa,
2364 vec!["1000".to_string()],
2365 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2366 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2367 123,
2368 )
2369 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2373
2374 for entry in &entries {
2376 assert!(
2377 entry.is_balanced(),
2378 "All entries including batched should be balanced"
2379 );
2380 }
2381
2382 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2384 std::collections::HashMap::new();
2385 for entry in &entries {
2386 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2387 }
2388
2389 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2391 assert!(
2392 dates_with_multiple > 0,
2393 "With batching, should see some dates with multiple entries"
2394 );
2395 }
2396
2397 #[test]
2398 fn test_temporal_patterns_business_days() {
2399 use datasynth_config::schema::{
2400 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2401 };
2402
2403 let mut coa_gen =
2404 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2405 let coa = Arc::new(coa_gen.generate());
2406
2407 let temporal_config = TemporalPatternsConfig {
2409 enabled: true,
2410 business_days: BusinessDaySchemaConfig {
2411 enabled: true,
2412 ..Default::default()
2413 },
2414 calendars: CalendarSchemaConfig {
2415 regions: vec!["US".to_string()],
2416 custom_holidays: vec![],
2417 },
2418 ..Default::default()
2419 };
2420
2421 let mut je_gen = JournalEntryGenerator::new_with_params(
2422 TransactionConfig::default(),
2423 coa,
2424 vec!["1000".to_string()],
2425 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2426 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2428 )
2429 .with_temporal_patterns(temporal_config, 42)
2430 .with_persona_errors(false);
2431
2432 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2434
2435 for entry in &entries {
2436 let weekday = entry.header.posting_date.weekday();
2437 assert!(
2438 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2439 "Posting date {:?} should not be a weekend",
2440 entry.header.posting_date
2441 );
2442 }
2443 }
2444
2445 #[test]
2446 fn test_default_generation_filters_weekends() {
2447 let mut coa_gen =
2451 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2452 let coa = Arc::new(coa_gen.generate());
2453
2454 let mut je_gen = JournalEntryGenerator::new_with_params(
2455 TransactionConfig::default(),
2456 coa,
2457 vec!["1000".to_string()],
2458 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2459 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2460 42,
2461 )
2462 .with_persona_errors(false);
2463
2464 let total = 500;
2465 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2466
2467 let weekend_count = entries
2468 .iter()
2469 .filter(|e| {
2470 let wd = e.header.posting_date.weekday();
2471 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2472 })
2473 .count();
2474
2475 let weekend_pct = weekend_count as f64 / total as f64;
2476 assert!(
2477 weekend_pct < 0.05,
2478 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2479 but got {:.1}% ({}/{})",
2480 weekend_pct * 100.0,
2481 weekend_count,
2482 total
2483 );
2484 }
2485
2486 #[test]
2487 fn test_document_type_derived_from_business_process() {
2488 let mut coa_gen =
2489 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2490 let coa = Arc::new(coa_gen.generate());
2491
2492 let mut je_gen = JournalEntryGenerator::new_with_params(
2493 TransactionConfig::default(),
2494 coa,
2495 vec!["1000".to_string()],
2496 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2497 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2498 99,
2499 )
2500 .with_persona_errors(false)
2501 .with_batching(false);
2502
2503 let total = 200;
2504 let mut doc_types = std::collections::HashSet::new();
2505 let mut sa_count = 0_usize;
2506
2507 for _ in 0..total {
2508 let entry = je_gen.generate();
2509 let dt = &entry.header.document_type;
2510 doc_types.insert(dt.clone());
2511 if dt == "SA" {
2512 sa_count += 1;
2513 }
2514 }
2515
2516 assert!(
2518 doc_types.len() > 3,
2519 "Expected >3 distinct document types, got {} ({:?})",
2520 doc_types.len(),
2521 doc_types,
2522 );
2523
2524 let sa_pct = sa_count as f64 / total as f64;
2526 assert!(
2527 sa_pct < 0.50,
2528 "Expected SA <50%, got {:.1}% ({}/{})",
2529 sa_pct * 100.0,
2530 sa_count,
2531 total,
2532 );
2533 }
2534
2535 #[test]
2536 fn test_enrich_line_items_account_description() {
2537 let mut coa_gen =
2538 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2539 let coa = Arc::new(coa_gen.generate());
2540
2541 let mut je_gen = JournalEntryGenerator::new_with_params(
2542 TransactionConfig::default(),
2543 coa,
2544 vec!["1000".to_string()],
2545 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2546 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2547 42,
2548 )
2549 .with_persona_errors(false);
2550
2551 let total = 200;
2552 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2553
2554 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2556 let lines_with_desc: usize = entries
2557 .iter()
2558 .flat_map(|e| &e.lines)
2559 .filter(|l| l.account_description.is_some())
2560 .count();
2561
2562 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2563 assert!(
2564 desc_pct > 0.95,
2565 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2566 desc_pct * 100.0,
2567 lines_with_desc,
2568 total_lines,
2569 );
2570 }
2571
2572 #[test]
2573 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2574 let mut coa_gen =
2575 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2576 let coa = Arc::new(coa_gen.generate());
2577
2578 let mut je_gen = JournalEntryGenerator::new_with_params(
2579 TransactionConfig::default(),
2580 coa,
2581 vec!["1000".to_string()],
2582 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2583 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2584 42,
2585 )
2586 .with_persona_errors(false);
2587
2588 let total = 300;
2589 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2590
2591 let expense_lines: Vec<&JournalEntryLine> = entries
2593 .iter()
2594 .flat_map(|e| &e.lines)
2595 .filter(|l| {
2596 let first = l.gl_account.chars().next().unwrap_or('0');
2597 first == '5' || first == '6'
2598 })
2599 .collect();
2600
2601 if !expense_lines.is_empty() {
2602 let with_cc = expense_lines
2603 .iter()
2604 .filter(|l| l.cost_center.is_some())
2605 .count();
2606 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2607 assert!(
2608 cc_pct > 0.80,
2609 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2610 cc_pct * 100.0,
2611 with_cc,
2612 expense_lines.len(),
2613 );
2614 }
2615 }
2616
2617 #[test]
2618 fn test_enrich_line_items_profit_center_and_line_text() {
2619 let mut coa_gen =
2620 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2621 let coa = Arc::new(coa_gen.generate());
2622
2623 let mut je_gen = JournalEntryGenerator::new_with_params(
2624 TransactionConfig::default(),
2625 coa,
2626 vec!["1000".to_string()],
2627 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2628 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2629 42,
2630 )
2631 .with_persona_errors(false);
2632
2633 let total = 100;
2634 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2635
2636 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2637
2638 let with_pc = entries
2640 .iter()
2641 .flat_map(|e| &e.lines)
2642 .filter(|l| l.profit_center.is_some())
2643 .count();
2644 let pc_pct = with_pc as f64 / total_lines as f64;
2645 assert!(
2646 pc_pct > 0.95,
2647 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2648 pc_pct * 100.0,
2649 with_pc,
2650 total_lines,
2651 );
2652
2653 let with_text = entries
2655 .iter()
2656 .flat_map(|e| &e.lines)
2657 .filter(|l| l.line_text.is_some())
2658 .count();
2659 let text_pct = with_text as f64 / total_lines as f64;
2660 assert!(
2661 text_pct > 0.95,
2662 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2663 text_pct * 100.0,
2664 with_text,
2665 total_lines,
2666 );
2667 }
2668}