1use chrono::{Datelike, NaiveDate};
4use datasynth_core::utils::seeded_rng;
5use rand::prelude::*;
6use rand_chacha::ChaCha8Rng;
7use rust_decimal::prelude::*;
8use rust_decimal::Decimal;
9use std::sync::Arc;
10
11use tracing::debug;
12
13use datasynth_config::schema::{
14 FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
15};
16use datasynth_core::distributions::{
17 BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
18 EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
19 ProcessingLagCalculator, ProcessingLagConfig, *,
20};
21use datasynth_core::models::*;
22use datasynth_core::templates::{
23 descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
24};
25use datasynth_core::traits::Generator;
26use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
27use datasynth_core::CountryPack;
28
29use crate::company_selector::WeightedCompanySelector;
30use crate::user_generator::{UserGenerator, UserGeneratorConfig};
31
32pub struct JournalEntryGenerator {
34 rng: ChaCha8Rng,
35 seed: u64,
36 config: TransactionConfig,
37 coa: Arc<ChartOfAccounts>,
38 companies: Vec<String>,
39 company_selector: WeightedCompanySelector,
40 line_sampler: LineItemSampler,
41 amount_sampler: AmountSampler,
42 temporal_sampler: TemporalSampler,
43 start_date: NaiveDate,
44 end_date: NaiveDate,
45 count: u64,
46 uuid_factory: DeterministicUuidFactory,
47 user_pool: Option<UserPool>,
49 description_generator: DescriptionGenerator,
50 reference_generator: ReferenceGenerator,
51 template_config: TemplateConfig,
52 vendor_pool: VendorPool,
53 customer_pool: CustomerPool,
54 material_pool: Option<MaterialPool>,
56 using_real_master_data: bool,
58 fraud_config: FraudConfig,
60 persona_errors_enabled: bool,
62 approval_enabled: bool,
64 approval_threshold: rust_decimal::Decimal,
65 batch_state: Option<BatchState>,
67 drift_controller: Option<DriftController>,
69 business_day_calculator: Option<BusinessDayCalculator>,
71 processing_lag_calculator: Option<ProcessingLagCalculator>,
72 temporal_patterns_config: Option<TemporalPatternsConfig>,
73}
74
75#[derive(Clone)]
80struct BatchState {
81 base_account_number: String,
83 base_amount: rust_decimal::Decimal,
84 base_business_process: Option<BusinessProcess>,
85 base_posting_date: NaiveDate,
86 remaining: u8,
88}
89
90impl JournalEntryGenerator {
91 pub fn new_with_params(
93 config: TransactionConfig,
94 coa: Arc<ChartOfAccounts>,
95 companies: Vec<String>,
96 start_date: NaiveDate,
97 end_date: NaiveDate,
98 seed: u64,
99 ) -> Self {
100 Self::new_with_full_config(
101 config,
102 coa,
103 companies,
104 start_date,
105 end_date,
106 seed,
107 TemplateConfig::default(),
108 None,
109 )
110 }
111
112 #[allow(clippy::too_many_arguments)]
114 pub fn new_with_full_config(
115 config: TransactionConfig,
116 coa: Arc<ChartOfAccounts>,
117 companies: Vec<String>,
118 start_date: NaiveDate,
119 end_date: NaiveDate,
120 seed: u64,
121 template_config: TemplateConfig,
122 user_pool: Option<UserPool>,
123 ) -> Self {
124 let user_pool = user_pool.or_else(|| {
126 if template_config.names.generate_realistic_names {
127 let user_gen_config = UserGeneratorConfig {
128 culture_distribution: vec![
129 (
130 datasynth_core::templates::NameCulture::WesternUs,
131 template_config.names.culture_distribution.western_us,
132 ),
133 (
134 datasynth_core::templates::NameCulture::Hispanic,
135 template_config.names.culture_distribution.hispanic,
136 ),
137 (
138 datasynth_core::templates::NameCulture::German,
139 template_config.names.culture_distribution.german,
140 ),
141 (
142 datasynth_core::templates::NameCulture::French,
143 template_config.names.culture_distribution.french,
144 ),
145 (
146 datasynth_core::templates::NameCulture::Chinese,
147 template_config.names.culture_distribution.chinese,
148 ),
149 (
150 datasynth_core::templates::NameCulture::Japanese,
151 template_config.names.culture_distribution.japanese,
152 ),
153 (
154 datasynth_core::templates::NameCulture::Indian,
155 template_config.names.culture_distribution.indian,
156 ),
157 ],
158 email_domain: template_config.names.email_domain.clone(),
159 generate_realistic_names: true,
160 };
161 let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
162 Some(user_gen.generate_standard(&companies))
163 } else {
164 None
165 }
166 });
167
168 let mut ref_gen = ReferenceGenerator::new(
170 start_date.year(),
171 companies.first().map(|s| s.as_str()).unwrap_or("1000"),
172 );
173 ref_gen.set_prefix(
174 ReferenceType::Invoice,
175 &template_config.references.invoice_prefix,
176 );
177 ref_gen.set_prefix(
178 ReferenceType::PurchaseOrder,
179 &template_config.references.po_prefix,
180 );
181 ref_gen.set_prefix(
182 ReferenceType::SalesOrder,
183 &template_config.references.so_prefix,
184 );
185
186 let company_selector = WeightedCompanySelector::uniform(companies.clone());
188
189 Self {
190 rng: seeded_rng(seed, 0),
191 seed,
192 config: config.clone(),
193 coa,
194 companies,
195 company_selector,
196 line_sampler: LineItemSampler::with_config(
197 seed + 1,
198 config.line_item_distribution.clone(),
199 config.even_odd_distribution.clone(),
200 config.debit_credit_distribution.clone(),
201 ),
202 amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
203 temporal_sampler: TemporalSampler::with_config(
204 seed + 3,
205 config.seasonality.clone(),
206 WorkingHoursConfig::default(),
207 Vec::new(),
208 ),
209 start_date,
210 end_date,
211 count: 0,
212 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
213 user_pool,
214 description_generator: DescriptionGenerator::new(),
215 reference_generator: ref_gen,
216 template_config,
217 vendor_pool: VendorPool::standard(),
218 customer_pool: CustomerPool::standard(),
219 material_pool: None,
220 using_real_master_data: false,
221 fraud_config: FraudConfig::default(),
222 persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), batch_state: None,
226 drift_controller: None,
227 business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
230 Region::US,
231 start_date.year(),
232 ))),
233 processing_lag_calculator: None,
234 temporal_patterns_config: None,
235 }
236 }
237
238 pub fn from_generator_config(
243 full_config: &GeneratorConfig,
244 coa: Arc<ChartOfAccounts>,
245 start_date: NaiveDate,
246 end_date: NaiveDate,
247 seed: u64,
248 ) -> Self {
249 let companies: Vec<String> = full_config
250 .companies
251 .iter()
252 .map(|c| c.code.clone())
253 .collect();
254
255 let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
257
258 let mut generator = Self::new_with_full_config(
259 full_config.transactions.clone(),
260 coa,
261 companies,
262 start_date,
263 end_date,
264 seed,
265 full_config.templates.clone(),
266 None,
267 );
268
269 generator.company_selector = company_selector;
271
272 generator.fraud_config = full_config.fraud.clone();
274
275 let temporal_config = &full_config.temporal_patterns;
277 if temporal_config.enabled {
278 generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
279 }
280
281 generator
282 }
283
284 pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
291 if config.business_days.enabled {
293 let region = config
294 .calendars
295 .regions
296 .first()
297 .map(|r| Self::parse_region(r))
298 .unwrap_or(Region::US);
299
300 let calendar = HolidayCalendar::new(region, self.start_date.year());
301 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
302 }
303
304 if config.processing_lags.enabled {
306 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
307 self.processing_lag_calculator =
308 Some(ProcessingLagCalculator::with_config(seed, lag_config));
309 }
310
311 let model = config.period_end.model.as_deref().unwrap_or("flat");
313 if model != "flat"
314 || config
315 .period_end
316 .month_end
317 .as_ref()
318 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
319 {
320 let dynamics = Self::convert_period_end_config(&config.period_end);
321 self.temporal_sampler.set_period_end_dynamics(dynamics);
322 }
323
324 self.temporal_patterns_config = Some(config);
325 self
326 }
327
328 pub fn with_country_pack_temporal(
336 mut self,
337 config: TemporalPatternsConfig,
338 seed: u64,
339 pack: &CountryPack,
340 ) -> Self {
341 if config.business_days.enabled {
343 let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
344 self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
345 }
346
347 if config.processing_lags.enabled {
349 let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
350 self.processing_lag_calculator =
351 Some(ProcessingLagCalculator::with_config(seed, lag_config));
352 }
353
354 let model = config.period_end.model.as_deref().unwrap_or("flat");
356 if model != "flat"
357 || config
358 .period_end
359 .month_end
360 .as_ref()
361 .is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
362 {
363 let dynamics = Self::convert_period_end_config(&config.period_end);
364 self.temporal_sampler.set_period_end_dynamics(dynamics);
365 }
366
367 self.temporal_patterns_config = Some(config);
368 self
369 }
370
371 fn convert_processing_lag_config(
373 schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
374 ) -> ProcessingLagConfig {
375 let mut config = ProcessingLagConfig {
376 enabled: schema.enabled,
377 ..Default::default()
378 };
379
380 let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
382 let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
383 if let Some(min) = lag.min_hours {
384 dist.min_lag_hours = min;
385 }
386 if let Some(max) = lag.max_hours {
387 dist.max_lag_hours = max;
388 }
389 dist
390 };
391
392 if let Some(ref lag) = schema.sales_order_lag {
394 config
395 .event_lags
396 .insert(EventType::SalesOrder, convert_lag(lag));
397 }
398 if let Some(ref lag) = schema.purchase_order_lag {
399 config
400 .event_lags
401 .insert(EventType::PurchaseOrder, convert_lag(lag));
402 }
403 if let Some(ref lag) = schema.goods_receipt_lag {
404 config
405 .event_lags
406 .insert(EventType::GoodsReceipt, convert_lag(lag));
407 }
408 if let Some(ref lag) = schema.invoice_receipt_lag {
409 config
410 .event_lags
411 .insert(EventType::InvoiceReceipt, convert_lag(lag));
412 }
413 if let Some(ref lag) = schema.invoice_issue_lag {
414 config
415 .event_lags
416 .insert(EventType::InvoiceIssue, convert_lag(lag));
417 }
418 if let Some(ref lag) = schema.payment_lag {
419 config
420 .event_lags
421 .insert(EventType::Payment, convert_lag(lag));
422 }
423 if let Some(ref lag) = schema.journal_entry_lag {
424 config
425 .event_lags
426 .insert(EventType::JournalEntry, convert_lag(lag));
427 }
428
429 if let Some(ref cross_day) = schema.cross_day_posting {
431 config.cross_day = CrossDayConfig {
432 enabled: cross_day.enabled,
433 probability_by_hour: cross_day.probability_by_hour.clone(),
434 ..Default::default()
435 };
436 }
437
438 config
439 }
440
441 fn convert_period_end_config(
443 schema: &datasynth_config::schema::PeriodEndSchemaConfig,
444 ) -> PeriodEndDynamics {
445 let model_type = schema.model.as_deref().unwrap_or("exponential");
446
447 let convert_period =
449 |period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
450 default_peak: f64|
451 -> PeriodEndConfig {
452 if let Some(p) = period {
453 let model = match model_type {
454 "flat" => PeriodEndModel::FlatMultiplier {
455 multiplier: p.peak_multiplier.unwrap_or(default_peak),
456 },
457 "extended_crunch" => PeriodEndModel::ExtendedCrunch {
458 start_day: p.start_day.unwrap_or(-10),
459 sustained_high_days: p.sustained_high_days.unwrap_or(3),
460 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
461 ramp_up_days: 3, },
463 _ => PeriodEndModel::ExponentialAcceleration {
464 start_day: p.start_day.unwrap_or(-10),
465 base_multiplier: p.base_multiplier.unwrap_or(1.0),
466 peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
467 decay_rate: p.decay_rate.unwrap_or(0.3),
468 },
469 };
470 PeriodEndConfig {
471 enabled: true,
472 model,
473 additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
474 }
475 } else {
476 PeriodEndConfig {
477 enabled: true,
478 model: PeriodEndModel::ExponentialAcceleration {
479 start_day: -10,
480 base_multiplier: 1.0,
481 peak_multiplier: default_peak,
482 decay_rate: 0.3,
483 },
484 additional_multiplier: 1.0,
485 }
486 }
487 };
488
489 PeriodEndDynamics::new(
490 convert_period(schema.month_end.as_ref(), 2.0),
491 convert_period(schema.quarter_end.as_ref(), 3.5),
492 convert_period(schema.year_end.as_ref(), 5.0),
493 )
494 }
495
496 fn parse_region(region_str: &str) -> Region {
498 match region_str.to_uppercase().as_str() {
499 "US" => Region::US,
500 "DE" => Region::DE,
501 "GB" => Region::GB,
502 "CN" => Region::CN,
503 "JP" => Region::JP,
504 "IN" => Region::IN,
505 "BR" => Region::BR,
506 "MX" => Region::MX,
507 "AU" => Region::AU,
508 "SG" => Region::SG,
509 "KR" => Region::KR,
510 "FR" => Region::FR,
511 "IT" => Region::IT,
512 "ES" => Region::ES,
513 "CA" => Region::CA,
514 _ => Region::US,
515 }
516 }
517
518 pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
520 self.company_selector = selector;
521 }
522
523 pub fn company_selector(&self) -> &WeightedCompanySelector {
525 &self.company_selector
526 }
527
528 pub fn set_fraud_config(&mut self, config: FraudConfig) {
530 self.fraud_config = config;
531 }
532
533 pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
538 if !vendors.is_empty() {
539 self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
540 self.using_real_master_data = true;
541 }
542 self
543 }
544
545 pub fn with_customers(mut self, customers: &[Customer]) -> Self {
550 if !customers.is_empty() {
551 self.customer_pool = CustomerPool::from_customers(customers.to_vec());
552 self.using_real_master_data = true;
553 }
554 self
555 }
556
557 pub fn with_materials(mut self, materials: &[Material]) -> Self {
561 if !materials.is_empty() {
562 self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
563 self.using_real_master_data = true;
564 }
565 self
566 }
567
568 pub fn with_master_data(
573 self,
574 vendors: &[Vendor],
575 customers: &[Customer],
576 materials: &[Material],
577 ) -> Self {
578 self.with_vendors(vendors)
579 .with_customers(customers)
580 .with_materials(materials)
581 }
582
583 pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
590 let name_gen =
591 datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
592 let config = UserGeneratorConfig {
593 culture_distribution: Vec::new(),
596 email_domain: name_gen.email_domain().to_string(),
597 generate_realistic_names: true,
598 };
599 let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
600 self.user_pool = Some(user_gen.generate_standard(&self.companies));
601 self
602 }
603
604 pub fn is_using_real_master_data(&self) -> bool {
606 self.using_real_master_data
607 }
608
609 fn determine_fraud(&mut self) -> Option<FraudType> {
611 if !self.fraud_config.enabled {
612 return None;
613 }
614
615 if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
617 return None;
618 }
619
620 Some(self.select_fraud_type())
622 }
623
624 fn select_fraud_type(&mut self) -> FraudType {
626 let dist = &self.fraud_config.fraud_type_distribution;
627 let roll: f64 = self.rng.random();
628
629 let mut cumulative = 0.0;
630
631 cumulative += dist.suspense_account_abuse;
632 if roll < cumulative {
633 return FraudType::SuspenseAccountAbuse;
634 }
635
636 cumulative += dist.fictitious_transaction;
637 if roll < cumulative {
638 return FraudType::FictitiousTransaction;
639 }
640
641 cumulative += dist.revenue_manipulation;
642 if roll < cumulative {
643 return FraudType::RevenueManipulation;
644 }
645
646 cumulative += dist.expense_capitalization;
647 if roll < cumulative {
648 return FraudType::ExpenseCapitalization;
649 }
650
651 cumulative += dist.split_transaction;
652 if roll < cumulative {
653 return FraudType::SplitTransaction;
654 }
655
656 cumulative += dist.timing_anomaly;
657 if roll < cumulative {
658 return FraudType::TimingAnomaly;
659 }
660
661 cumulative += dist.unauthorized_access;
662 if roll < cumulative {
663 return FraudType::UnauthorizedAccess;
664 }
665
666 FraudType::DuplicatePayment
668 }
669
670 fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
672 match fraud_type {
673 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
674 FraudAmountPattern::ThresholdAdjacent
675 }
676 FraudType::FictitiousTransaction
677 | FraudType::FictitiousEntry
678 | FraudType::SuspenseAccountAbuse
679 | FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
680 FraudType::RevenueManipulation
681 | FraudType::ExpenseCapitalization
682 | FraudType::ImproperCapitalization
683 | FraudType::ReserveManipulation
684 | FraudType::UnauthorizedAccess
685 | FraudType::PrematureRevenue
686 | FraudType::UnderstatedLiabilities
687 | FraudType::OverstatedAssets
688 | FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
689 FraudType::DuplicatePayment
690 | FraudType::TimingAnomaly
691 | FraudType::SelfApproval
692 | FraudType::ExceededApprovalLimit
693 | FraudType::SegregationOfDutiesViolation
694 | FraudType::UnauthorizedApproval
695 | FraudType::CollusiveApproval
696 | FraudType::FictitiousVendor
697 | FraudType::ShellCompanyPayment
698 | FraudType::Kickback
699 | FraudType::KickbackScheme
700 | FraudType::InvoiceManipulation
701 | FraudType::AssetMisappropriation
702 | FraudType::InventoryTheft
703 | FraudType::GhostEmployee => FraudAmountPattern::Normal,
704 FraudType::ImproperRevenueRecognition
706 | FraudType::ImproperPoAllocation
707 | FraudType::VariableConsiderationManipulation
708 | FraudType::ContractModificationMisstatement => {
709 FraudAmountPattern::StatisticallyImprobable
710 }
711 FraudType::LeaseClassificationManipulation
713 | FraudType::OffBalanceSheetLease
714 | FraudType::LeaseLiabilityUnderstatement
715 | FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
716 FraudType::FairValueHierarchyManipulation
718 | FraudType::Level3InputManipulation
719 | FraudType::ValuationTechniqueManipulation => {
720 FraudAmountPattern::StatisticallyImprobable
721 }
722 FraudType::DelayedImpairment
724 | FraudType::ImpairmentTestAvoidance
725 | FraudType::CashFlowProjectionManipulation
726 | FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
727 FraudType::BidRigging
729 | FraudType::PhantomVendorContract
730 | FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
731 FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
732 FraudType::GhostEmployeePayroll
734 | FraudType::PayrollInflation
735 | FraudType::DuplicateExpenseReport
736 | FraudType::FictitiousExpense => FraudAmountPattern::Normal,
737 FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
738 FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
740 FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
741 }
742 }
743
744 #[inline]
746 fn generate_deterministic_uuid(&self) -> uuid::Uuid {
747 self.uuid_factory.next()
748 }
749
750 const COST_CENTER_POOL: &'static [&'static str] =
752 &["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
753
754 fn enrich_line_items(&self, entry: &mut JournalEntry) {
760 let posting_date = entry.header.posting_date;
761 let company_code = &entry.header.company_code;
762 let header_text = entry.header.header_text.clone();
763 let business_process = entry.header.business_process;
764
765 let doc_id_bytes = entry.header.document_id.as_bytes();
767 let mut cc_seed: usize = 0;
768 for &b in doc_id_bytes {
769 cc_seed = cc_seed.wrapping_add(b as usize);
770 }
771
772 for (i, line) in entry.lines.iter_mut().enumerate() {
773 if line.account_description.is_none() {
775 line.account_description = self
776 .coa
777 .get_account(&line.gl_account)
778 .map(|a| a.short_description.clone());
779 }
780
781 if line.cost_center.is_none() {
783 let first_char = line.gl_account.chars().next().unwrap_or('0');
784 if first_char == '5' || first_char == '6' {
785 let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
786 line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
787 }
788 }
789
790 if line.profit_center.is_none() {
792 let suffix = match business_process {
793 Some(BusinessProcess::P2P) => "-P2P",
794 Some(BusinessProcess::O2C) => "-O2C",
795 Some(BusinessProcess::R2R) => "-R2R",
796 Some(BusinessProcess::H2R) => "-H2R",
797 _ => "",
798 };
799 line.profit_center = Some(format!("PC-{}{}", company_code, suffix));
800 }
801
802 if line.line_text.is_none() {
804 line.line_text = header_text.clone();
805 }
806
807 if line.value_date.is_none()
809 && (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
810 {
811 line.value_date = Some(posting_date);
812 }
813
814 if line.assignment.is_none() {
816 if line.gl_account.starts_with("2000") {
817 if let Some(ref ht) = header_text {
819 if let Some(vendor_part) = ht.rsplit(" - ").next() {
821 if vendor_part.starts_with("V-")
822 || vendor_part.starts_with("VENDOR")
823 || vendor_part.starts_with("Vendor")
824 {
825 line.assignment = Some(vendor_part.to_string());
826 }
827 }
828 }
829 } else if line.gl_account.starts_with("1100") {
830 if let Some(ref ht) = header_text {
832 if let Some(customer_part) = ht.rsplit(" - ").next() {
833 if customer_part.starts_with("C-")
834 || customer_part.starts_with("CUST")
835 || customer_part.starts_with("Customer")
836 {
837 line.assignment = Some(customer_part.to_string());
838 }
839 }
840 }
841 }
842 }
843 }
844 }
845
846 pub fn generate(&mut self) -> JournalEntry {
848 debug!(
849 count = self.count,
850 companies = self.companies.len(),
851 start_date = %self.start_date,
852 end_date = %self.end_date,
853 "Generating journal entry"
854 );
855
856 if let Some(ref state) = self.batch_state {
858 if state.remaining > 0 {
859 return self.generate_batched_entry();
860 }
861 }
862
863 self.count += 1;
864
865 let document_id = self.generate_deterministic_uuid();
867
868 let mut posting_date = self
870 .temporal_sampler
871 .sample_date(self.start_date, self.end_date);
872
873 if let Some(ref calc) = self.business_day_calculator {
875 if !calc.is_business_day(posting_date) {
876 posting_date = calc.next_business_day(posting_date, false);
878 if posting_date > self.end_date {
880 posting_date = calc.prev_business_day(self.end_date, true);
881 }
882 }
883 }
884
885 let company_code = self.company_selector.select(&mut self.rng).to_string();
887
888 let line_spec = self.line_sampler.sample();
890
891 let source = self.select_source();
893 let is_automated = matches!(
894 source,
895 TransactionSource::Automated | TransactionSource::Recurring
896 );
897
898 let business_process = self.select_business_process();
900
901 let fraud_type = self.determine_fraud();
903 let is_fraud = fraud_type.is_some();
904
905 let time = self.temporal_sampler.sample_time(!is_automated);
907 let created_at = posting_date.and_time(time).and_utc();
908
909 let (created_by, user_persona) = self.select_user(is_automated);
911
912 let mut header =
914 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
915 header.created_at = created_at;
916 header.source = source;
917 header.created_by = created_by;
918 header.user_persona = user_persona;
919 header.business_process = Some(business_process);
920 header.document_type = Self::document_type_for_process(business_process).to_string();
921 header.is_fraud = is_fraud;
922 header.fraud_type = fraud_type;
923
924 let mut context =
926 DescriptionContext::with_period(posting_date.month(), posting_date.year());
927
928 match business_process {
930 BusinessProcess::P2P => {
931 if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
932 context.vendor_name = Some(vendor.name.clone());
933 }
934 }
935 BusinessProcess::O2C => {
936 if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
937 context.customer_name = Some(customer.name.clone());
938 }
939 }
940 _ => {}
941 }
942
943 if self.template_config.descriptions.generate_header_text {
945 header.header_text = Some(self.description_generator.generate_header_text(
946 business_process,
947 &context,
948 &mut self.rng,
949 ));
950 }
951
952 if self.template_config.references.generate_references {
954 header.reference = Some(
955 self.reference_generator
956 .generate_for_process_year(business_process, posting_date.year()),
957 );
958 }
959
960 let mut entry = JournalEntry::new(header);
962
963 let base_amount = if let Some(ft) = fraud_type {
965 let pattern = self.fraud_type_to_amount_pattern(ft);
966 self.amount_sampler.sample_fraud(pattern)
967 } else {
968 self.amount_sampler.sample()
969 };
970
971 let drift_adjusted_amount = {
973 let drift = self.get_drift_adjustments(posting_date);
974 if drift.amount_mean_multiplier != 1.0 {
975 let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
977 let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
978 Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
979 } else {
980 base_amount
981 }
982 };
983
984 let total_amount = if is_automated {
986 drift_adjusted_amount } else {
988 self.apply_human_variation(drift_adjusted_amount)
989 };
990
991 let debit_amounts = self
993 .amount_sampler
994 .sample_summing_to(line_spec.debit_count, total_amount);
995 for (i, amount) in debit_amounts.into_iter().enumerate() {
996 let account_number = self.select_debit_account().account_number.clone();
997 let mut line = JournalEntryLine::debit(
998 entry.header.document_id,
999 (i + 1) as u32,
1000 account_number.clone(),
1001 amount,
1002 );
1003
1004 if self.template_config.descriptions.generate_line_text {
1006 line.line_text = Some(self.description_generator.generate_line_text(
1007 &account_number,
1008 &context,
1009 &mut self.rng,
1010 ));
1011 }
1012
1013 entry.add_line(line);
1014 }
1015
1016 let credit_amounts = self
1018 .amount_sampler
1019 .sample_summing_to(line_spec.credit_count, total_amount);
1020 for (i, amount) in credit_amounts.into_iter().enumerate() {
1021 let account_number = self.select_credit_account().account_number.clone();
1022 let mut line = JournalEntryLine::credit(
1023 entry.header.document_id,
1024 (line_spec.debit_count + i + 1) as u32,
1025 account_number.clone(),
1026 amount,
1027 );
1028
1029 if self.template_config.descriptions.generate_line_text {
1031 line.line_text = Some(self.description_generator.generate_line_text(
1032 &account_number,
1033 &context,
1034 &mut self.rng,
1035 ));
1036 }
1037
1038 entry.add_line(line);
1039 }
1040
1041 self.enrich_line_items(&mut entry);
1043
1044 if self.persona_errors_enabled && !is_automated {
1046 self.maybe_inject_persona_error(&mut entry);
1047 }
1048
1049 if self.approval_enabled {
1051 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1052 }
1053
1054 self.maybe_start_batch(&entry);
1056
1057 entry
1058 }
1059
1060 pub fn with_persona_errors(mut self, enabled: bool) -> Self {
1065 self.persona_errors_enabled = enabled;
1066 self
1067 }
1068
1069 pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
1074 self.fraud_config = config;
1075 self
1076 }
1077
1078 pub fn persona_errors_enabled(&self) -> bool {
1080 self.persona_errors_enabled
1081 }
1082
1083 pub fn with_batching(mut self, enabled: bool) -> Self {
1088 if !enabled {
1089 self.batch_state = None;
1090 }
1091 self
1092 }
1093
1094 pub fn batching_enabled(&self) -> bool {
1096 true
1098 }
1099
1100 fn maybe_start_batch(&mut self, entry: &JournalEntry) {
1105 if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
1107 return;
1108 }
1109
1110 if self.rng.random::<f64>() > 0.15 {
1112 return;
1113 }
1114
1115 let base_account = entry
1117 .lines
1118 .first()
1119 .map(|l| l.gl_account.clone())
1120 .unwrap_or_default();
1121
1122 let base_amount = entry.total_debit();
1123
1124 self.batch_state = Some(BatchState {
1125 base_account_number: base_account,
1126 base_amount,
1127 base_business_process: entry.header.business_process,
1128 base_posting_date: entry.header.posting_date,
1129 remaining: self.rng.random_range(2..7), });
1131 }
1132
1133 fn generate_batched_entry(&mut self) -> JournalEntry {
1141 use rust_decimal::Decimal;
1142
1143 if let Some(ref mut state) = self.batch_state {
1145 state.remaining = state.remaining.saturating_sub(1);
1146 }
1147
1148 let Some(batch) = self.batch_state.clone() else {
1149 tracing::warn!(
1152 "generate_batched_entry called without batch_state; generating standard entry"
1153 );
1154 self.batch_state = None;
1155 return self.generate();
1156 };
1157
1158 let posting_date = batch.base_posting_date;
1160
1161 self.count += 1;
1162 let document_id = self.generate_deterministic_uuid();
1163
1164 let company_code = self.company_selector.select(&mut self.rng).to_string();
1166
1167 let _line_spec = LineItemSpec {
1169 total_count: 2,
1170 debit_count: 1,
1171 credit_count: 1,
1172 split_type: DebitCreditSplit::Equal,
1173 };
1174
1175 let source = TransactionSource::Manual;
1177
1178 let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
1180
1181 let time = self.temporal_sampler.sample_time(true);
1183 let created_at = posting_date.and_time(time).and_utc();
1184
1185 let (created_by, user_persona) = self.select_user(false);
1187
1188 let mut header =
1190 JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
1191 header.created_at = created_at;
1192 header.source = source;
1193 header.created_by = created_by;
1194 header.user_persona = user_persona;
1195 header.business_process = Some(business_process);
1196 header.document_type = Self::document_type_for_process(business_process).to_string();
1197
1198 let variation = self.rng.random_range(-0.15..0.15);
1200 let varied_amount =
1201 batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
1202 let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
1203
1204 let mut entry = JournalEntry::new(header);
1206
1207 let debit_line = JournalEntryLine::debit(
1209 entry.header.document_id,
1210 1,
1211 batch.base_account_number.clone(),
1212 total_amount,
1213 );
1214 entry.add_line(debit_line);
1215
1216 let credit_account = self.select_credit_account().account_number.clone();
1218 let credit_line =
1219 JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
1220 entry.add_line(credit_line);
1221
1222 self.enrich_line_items(&mut entry);
1224
1225 if self.persona_errors_enabled {
1227 self.maybe_inject_persona_error(&mut entry);
1228 }
1229
1230 if self.approval_enabled {
1232 self.maybe_apply_approval_workflow(&mut entry, posting_date);
1233 }
1234
1235 if batch.remaining <= 1 {
1237 self.batch_state = None;
1238 }
1239
1240 entry
1241 }
1242
1243 fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
1245 let persona_str = &entry.header.user_persona;
1247 let persona = match persona_str.to_lowercase().as_str() {
1248 s if s.contains("junior") => UserPersona::JuniorAccountant,
1249 s if s.contains("senior") => UserPersona::SeniorAccountant,
1250 s if s.contains("controller") => UserPersona::Controller,
1251 s if s.contains("manager") => UserPersona::Manager,
1252 s if s.contains("executive") => UserPersona::Executive,
1253 _ => return, };
1255
1256 let base_error_rate = persona.error_rate();
1258
1259 let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
1261
1262 if self.rng.random::<f64>() >= adjusted_rate {
1264 return; }
1266
1267 self.inject_human_error(entry, persona);
1269 }
1270
1271 fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
1280 use chrono::Datelike;
1281
1282 let mut rate = base_rate;
1283 let day = posting_date.day();
1284 let month = posting_date.month();
1285
1286 if month == 12 && day >= 28 {
1288 rate *= 2.0;
1289 return rate.min(0.5); }
1291
1292 if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
1294 rate *= 1.75; return rate.min(0.4);
1296 }
1297
1298 if day >= 28 {
1300 rate *= 1.5; }
1302
1303 let weekday = posting_date.weekday();
1305 match weekday {
1306 chrono::Weekday::Mon => {
1307 rate *= 1.2;
1309 }
1310 chrono::Weekday::Fri => {
1311 rate *= 1.3;
1313 }
1314 _ => {}
1315 }
1316
1317 rate.min(0.4)
1319 }
1320
1321 fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
1330 use rust_decimal::Decimal;
1331
1332 if amount < Decimal::from(10) {
1334 return amount;
1335 }
1336
1337 if self.rng.random::<f64>() > 0.70 {
1339 return amount;
1340 }
1341
1342 let variation_type: u8 = self.rng.random_range(0..4);
1344
1345 match variation_type {
1346 0 => {
1347 let variation_pct = self.rng.random_range(-0.02..0.02);
1349 let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
1350 (amount + variation).round_dp(2)
1351 }
1352 1 => {
1353 let ten = Decimal::from(10);
1355 (amount / ten).round() * ten
1356 }
1357 2 => {
1358 if amount >= Decimal::from(500) {
1360 let hundred = Decimal::from(100);
1361 (amount / hundred).round() * hundred
1362 } else {
1363 amount
1364 }
1365 }
1366 3 => {
1367 let cents = Decimal::new(self.rng.random_range(-100..100), 2);
1369 (amount + cents).max(Decimal::ZERO).round_dp(2)
1370 }
1371 _ => amount,
1372 }
1373 }
1374
1375 fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
1381 let balancing_idx = entry.lines.iter().position(|l| {
1383 if modified_was_debit {
1384 l.credit_amount > Decimal::ZERO
1385 } else {
1386 l.debit_amount > Decimal::ZERO
1387 }
1388 });
1389
1390 if let Some(idx) = balancing_idx {
1391 if modified_was_debit {
1392 entry.lines[idx].credit_amount += impact;
1393 } else {
1394 entry.lines[idx].debit_amount += impact;
1395 }
1396 }
1397 }
1398
1399 fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
1404 use rust_decimal::Decimal;
1405
1406 let error_type: u8 = match persona {
1408 UserPersona::JuniorAccountant => {
1409 self.rng.random_range(0..5)
1411 }
1412 UserPersona::SeniorAccountant => {
1413 self.rng.random_range(0..3)
1415 }
1416 UserPersona::Controller | UserPersona::Manager => {
1417 self.rng.random_range(3..5)
1419 }
1420 _ => return,
1421 };
1422
1423 match error_type {
1424 0 => {
1425 if let Some(line) = entry.lines.get_mut(0) {
1427 let is_debit = line.debit_amount > Decimal::ZERO;
1428 let original_amount = if is_debit {
1429 line.debit_amount
1430 } else {
1431 line.credit_amount
1432 };
1433
1434 let s = original_amount.to_string();
1436 if s.len() >= 2 {
1437 let chars: Vec<char> = s.chars().collect();
1438 let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
1439 if chars[pos].is_ascii_digit()
1440 && chars.get(pos + 1).is_some_and(|c| c.is_ascii_digit())
1441 {
1442 let mut new_chars = chars;
1443 new_chars.swap(pos, pos + 1);
1444 if let Ok(new_amount) =
1445 new_chars.into_iter().collect::<String>().parse::<Decimal>()
1446 {
1447 let impact = new_amount - original_amount;
1448
1449 if is_debit {
1451 entry.lines[0].debit_amount = new_amount;
1452 } else {
1453 entry.lines[0].credit_amount = new_amount;
1454 }
1455
1456 Self::rebalance_entry(entry, is_debit, impact);
1458
1459 entry.header.header_text = Some(
1460 entry.header.header_text.clone().unwrap_or_default()
1461 + " [HUMAN_ERROR:TRANSPOSITION]",
1462 );
1463 }
1464 }
1465 }
1466 }
1467 }
1468 1 => {
1469 if let Some(line) = entry.lines.get_mut(0) {
1471 let is_debit = line.debit_amount > Decimal::ZERO;
1472 let original_amount = if is_debit {
1473 line.debit_amount
1474 } else {
1475 line.credit_amount
1476 };
1477
1478 let new_amount = original_amount * Decimal::new(10, 0);
1479 let impact = new_amount - original_amount;
1480
1481 if is_debit {
1483 entry.lines[0].debit_amount = new_amount;
1484 } else {
1485 entry.lines[0].credit_amount = new_amount;
1486 }
1487
1488 Self::rebalance_entry(entry, is_debit, impact);
1490
1491 entry.header.header_text = Some(
1492 entry.header.header_text.clone().unwrap_or_default()
1493 + " [HUMAN_ERROR:DECIMAL_SHIFT]",
1494 );
1495 }
1496 }
1497 2 => {
1498 if let Some(ref mut text) = entry.header.header_text {
1500 let typos = ["teh", "adn", "wiht", "taht", "recieve"];
1501 let correct = ["the", "and", "with", "that", "receive"];
1502 let idx = self.rng.random_range(0..typos.len());
1503 if text.to_lowercase().contains(correct[idx]) {
1504 *text = text.replace(correct[idx], typos[idx]);
1505 *text = format!("{} [HUMAN_ERROR:TYPO]", text);
1506 }
1507 }
1508 }
1509 3 => {
1510 if let Some(line) = entry.lines.get_mut(0) {
1512 let is_debit = line.debit_amount > Decimal::ZERO;
1513 let original_amount = if is_debit {
1514 line.debit_amount
1515 } else {
1516 line.credit_amount
1517 };
1518
1519 let new_amount =
1520 (original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
1521 let impact = new_amount - original_amount;
1522
1523 if is_debit {
1525 entry.lines[0].debit_amount = new_amount;
1526 } else {
1527 entry.lines[0].credit_amount = new_amount;
1528 }
1529
1530 Self::rebalance_entry(entry, is_debit, impact);
1532
1533 entry.header.header_text = Some(
1534 entry.header.header_text.clone().unwrap_or_default()
1535 + " [HUMAN_ERROR:ROUNDED]",
1536 );
1537 }
1538 }
1539 4 => {
1540 if entry.header.document_date == entry.header.posting_date {
1543 let days_late = self.rng.random_range(5..15);
1544 entry.header.document_date =
1545 entry.header.posting_date - chrono::Duration::days(days_late);
1546 entry.header.header_text = Some(
1547 entry.header.header_text.clone().unwrap_or_default()
1548 + " [HUMAN_ERROR:LATE_POSTING]",
1549 );
1550 }
1551 }
1552 _ => {}
1553 }
1554 }
1555
1556 fn maybe_apply_approval_workflow(
1561 &mut self,
1562 entry: &mut JournalEntry,
1563 _posting_date: NaiveDate,
1564 ) {
1565 use rust_decimal::Decimal;
1566
1567 let amount = entry.total_debit();
1568
1569 if amount <= self.approval_threshold {
1571 let workflow = ApprovalWorkflow::auto_approved(
1573 entry.header.created_by.clone(),
1574 entry.header.user_persona.clone(),
1575 amount,
1576 entry.header.created_at,
1577 );
1578 entry.header.approval_workflow = Some(workflow);
1579 return;
1580 }
1581
1582 entry.header.sox_relevant = true;
1584
1585 let required_levels = if amount > Decimal::new(100000, 0) {
1587 3 } else if amount > Decimal::new(50000, 0) {
1589 2 } else {
1591 1 };
1593
1594 let mut workflow = ApprovalWorkflow::new(
1596 entry.header.created_by.clone(),
1597 entry.header.user_persona.clone(),
1598 amount,
1599 );
1600 workflow.required_levels = required_levels;
1601
1602 let submit_time = entry.header.created_at;
1604 let submit_action = ApprovalAction::new(
1605 entry.header.created_by.clone(),
1606 entry.header.user_persona.clone(),
1607 self.parse_persona(&entry.header.user_persona),
1608 ApprovalActionType::Submit,
1609 0,
1610 )
1611 .with_timestamp(submit_time);
1612
1613 workflow.actions.push(submit_action);
1614 workflow.status = ApprovalStatus::Pending;
1615 workflow.submitted_at = Some(submit_time);
1616
1617 let mut current_time = submit_time;
1619 for level in 1..=required_levels {
1620 let delay_hours = self.rng.random_range(1..4);
1622 current_time += chrono::Duration::hours(delay_hours);
1623
1624 while current_time.weekday() == chrono::Weekday::Sat
1626 || current_time.weekday() == chrono::Weekday::Sun
1627 {
1628 current_time += chrono::Duration::days(1);
1629 }
1630
1631 let (approver_id, approver_role) = self.select_approver(level);
1633
1634 let approve_action = ApprovalAction::new(
1635 approver_id.clone(),
1636 approver_role.to_string(),
1637 approver_role,
1638 ApprovalActionType::Approve,
1639 level,
1640 )
1641 .with_timestamp(current_time);
1642
1643 workflow.actions.push(approve_action);
1644 workflow.current_level = level;
1645 }
1646
1647 workflow.status = ApprovalStatus::Approved;
1649 workflow.approved_at = Some(current_time);
1650
1651 entry.header.approval_workflow = Some(workflow);
1652 }
1653
1654 fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
1656 let persona = match level {
1657 1 => UserPersona::Manager,
1658 2 => UserPersona::Controller,
1659 _ => UserPersona::Executive,
1660 };
1661
1662 if let Some(ref pool) = self.user_pool {
1664 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1665 return (user.user_id.clone(), persona);
1666 }
1667 }
1668
1669 let approver_id = match persona {
1671 UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
1672 UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
1673 UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
1674 _ => format!("USR{:04}", self.rng.random_range(1..1000)),
1675 };
1676
1677 (approver_id, persona)
1678 }
1679
1680 fn parse_persona(&self, persona_str: &str) -> UserPersona {
1682 match persona_str.to_lowercase().as_str() {
1683 s if s.contains("junior") => UserPersona::JuniorAccountant,
1684 s if s.contains("senior") => UserPersona::SeniorAccountant,
1685 s if s.contains("controller") => UserPersona::Controller,
1686 s if s.contains("manager") => UserPersona::Manager,
1687 s if s.contains("executive") => UserPersona::Executive,
1688 s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
1689 _ => UserPersona::JuniorAccountant, }
1691 }
1692
1693 pub fn with_approval(mut self, enabled: bool) -> Self {
1695 self.approval_enabled = enabled;
1696 self
1697 }
1698
1699 pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
1701 self.approval_threshold = threshold;
1702 self
1703 }
1704
1705 pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
1711 self.drift_controller = Some(controller);
1712 self
1713 }
1714
1715 pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
1720 if config.enabled {
1721 let total_periods = self.calculate_total_periods();
1722 self.drift_controller = Some(DriftController::new(config, seed, total_periods));
1723 }
1724 self
1725 }
1726
1727 fn calculate_total_periods(&self) -> u32 {
1729 let start_year = self.start_date.year();
1730 let start_month = self.start_date.month();
1731 let end_year = self.end_date.year();
1732 let end_month = self.end_date.month();
1733
1734 ((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
1735 }
1736
1737 fn date_to_period(&self, date: NaiveDate) -> u32 {
1739 let start_year = self.start_date.year();
1740 let start_month = self.start_date.month() as i32;
1741 let date_year = date.year();
1742 let date_month = date.month() as i32;
1743
1744 ((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
1745 }
1746
1747 fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
1749 if let Some(ref controller) = self.drift_controller {
1750 let period = self.date_to_period(date);
1751 controller.compute_adjustments(period)
1752 } else {
1753 DriftAdjustments::none()
1754 }
1755 }
1756
1757 #[inline]
1759 fn select_user(&mut self, is_automated: bool) -> (String, String) {
1760 if let Some(ref pool) = self.user_pool {
1761 let persona = if is_automated {
1762 UserPersona::AutomatedSystem
1763 } else {
1764 let roll: f64 = self.rng.random();
1766 if roll < 0.4 {
1767 UserPersona::JuniorAccountant
1768 } else if roll < 0.7 {
1769 UserPersona::SeniorAccountant
1770 } else if roll < 0.85 {
1771 UserPersona::Controller
1772 } else {
1773 UserPersona::Manager
1774 }
1775 };
1776
1777 if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
1778 return (user.user_id.clone(), user.persona.to_string());
1779 }
1780 }
1781
1782 if is_automated {
1784 (
1785 format!("BATCH{:04}", self.rng.random_range(1..=20)),
1786 "automated_system".to_string(),
1787 )
1788 } else {
1789 (
1790 format!("USER{:04}", self.rng.random_range(1..=40)),
1791 "senior_accountant".to_string(),
1792 )
1793 }
1794 }
1795
1796 #[inline]
1798 fn select_source(&mut self) -> TransactionSource {
1799 let roll: f64 = self.rng.random();
1800 let dist = &self.config.source_distribution;
1801
1802 if roll < dist.manual {
1803 TransactionSource::Manual
1804 } else if roll < dist.manual + dist.automated {
1805 TransactionSource::Automated
1806 } else if roll < dist.manual + dist.automated + dist.recurring {
1807 TransactionSource::Recurring
1808 } else {
1809 TransactionSource::Adjustment
1810 }
1811 }
1812
1813 #[inline]
1815 fn document_type_for_process(process: BusinessProcess) -> &'static str {
1824 match process {
1825 BusinessProcess::P2P => "KR",
1826 BusinessProcess::O2C => "DR",
1827 BusinessProcess::R2R => "SA",
1828 BusinessProcess::H2R => "HR",
1829 BusinessProcess::A2R => "AA",
1830 _ => "SA",
1831 }
1832 }
1833
1834 fn select_business_process(&mut self) -> BusinessProcess {
1835 let roll: f64 = self.rng.random();
1836
1837 if roll < 0.35 {
1839 BusinessProcess::O2C
1840 } else if roll < 0.65 {
1841 BusinessProcess::P2P
1842 } else if roll < 0.85 {
1843 BusinessProcess::R2R
1844 } else if roll < 0.95 {
1845 BusinessProcess::H2R
1846 } else {
1847 BusinessProcess::A2R
1848 }
1849 }
1850
1851 #[inline]
1852 fn select_debit_account(&mut self) -> &GLAccount {
1853 let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
1854 let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
1855
1856 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1858 accounts
1859 } else {
1860 expense_accounts
1861 };
1862
1863 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1864 tracing::warn!(
1865 "Account selection returned empty list, falling back to first COA account"
1866 );
1867 &self.coa.accounts[0]
1868 })
1869 }
1870
1871 #[inline]
1872 fn select_credit_account(&mut self) -> &GLAccount {
1873 let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
1874 let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
1875
1876 let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
1878 liability_accounts
1879 } else {
1880 revenue_accounts
1881 };
1882
1883 all.choose(&mut self.rng).copied().unwrap_or_else(|| {
1884 tracing::warn!(
1885 "Account selection returned empty list, falling back to first COA account"
1886 );
1887 &self.coa.accounts[0]
1888 })
1889 }
1890}
1891
1892impl Generator for JournalEntryGenerator {
1893 type Item = JournalEntry;
1894 type Config = (
1895 TransactionConfig,
1896 Arc<ChartOfAccounts>,
1897 Vec<String>,
1898 NaiveDate,
1899 NaiveDate,
1900 );
1901
1902 fn new(config: Self::Config, seed: u64) -> Self {
1903 Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
1904 }
1905
1906 fn generate_one(&mut self) -> Self::Item {
1907 self.generate()
1908 }
1909
1910 fn reset(&mut self) {
1911 self.rng = seeded_rng(self.seed, 0);
1912 self.line_sampler.reset(self.seed + 1);
1913 self.amount_sampler.reset(self.seed + 2);
1914 self.temporal_sampler.reset(self.seed + 3);
1915 self.count = 0;
1916 self.uuid_factory.reset();
1917
1918 let mut ref_gen = ReferenceGenerator::new(
1920 self.start_date.year(),
1921 self.companies.first().map(|s| s.as_str()).unwrap_or("1000"),
1922 );
1923 ref_gen.set_prefix(
1924 ReferenceType::Invoice,
1925 &self.template_config.references.invoice_prefix,
1926 );
1927 ref_gen.set_prefix(
1928 ReferenceType::PurchaseOrder,
1929 &self.template_config.references.po_prefix,
1930 );
1931 ref_gen.set_prefix(
1932 ReferenceType::SalesOrder,
1933 &self.template_config.references.so_prefix,
1934 );
1935 self.reference_generator = ref_gen;
1936 }
1937
1938 fn count(&self) -> u64 {
1939 self.count
1940 }
1941
1942 fn seed(&self) -> u64 {
1943 self.seed
1944 }
1945}
1946
1947use datasynth_core::traits::ParallelGenerator;
1948
1949impl ParallelGenerator for JournalEntryGenerator {
1950 fn split(self, parts: usize) -> Vec<Self> {
1956 let parts = parts.max(1);
1957 (0..parts)
1958 .map(|i| {
1959 let sub_seed = self
1961 .seed
1962 .wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
1963
1964 let mut gen = JournalEntryGenerator::new_with_full_config(
1965 self.config.clone(),
1966 Arc::clone(&self.coa),
1967 self.companies.clone(),
1968 self.start_date,
1969 self.end_date,
1970 sub_seed,
1971 self.template_config.clone(),
1972 self.user_pool.clone(),
1973 );
1974
1975 gen.company_selector = self.company_selector.clone();
1977 gen.vendor_pool = self.vendor_pool.clone();
1978 gen.customer_pool = self.customer_pool.clone();
1979 gen.material_pool = self.material_pool.clone();
1980 gen.using_real_master_data = self.using_real_master_data;
1981 gen.fraud_config = self.fraud_config.clone();
1982 gen.persona_errors_enabled = self.persona_errors_enabled;
1983 gen.approval_enabled = self.approval_enabled;
1984 gen.approval_threshold = self.approval_threshold;
1985
1986 gen.uuid_factory = DeterministicUuidFactory::for_partition(
1988 sub_seed,
1989 GeneratorType::JournalEntry,
1990 i as u8,
1991 );
1992
1993 if let Some(ref config) = self.temporal_patterns_config {
1995 gen.temporal_patterns_config = Some(config.clone());
1996 if config.business_days.enabled {
1998 if let Some(ref bdc) = self.business_day_calculator {
1999 gen.business_day_calculator = Some(bdc.clone());
2000 }
2001 }
2002 if config.processing_lags.enabled {
2004 let lag_config =
2005 Self::convert_processing_lag_config(&config.processing_lags);
2006 gen.processing_lag_calculator =
2007 Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
2008 }
2009 }
2010
2011 if let Some(ref dc) = self.drift_controller {
2013 gen.drift_controller = Some(dc.clone());
2014 }
2015
2016 gen
2017 })
2018 .collect()
2019 }
2020}
2021
2022#[cfg(test)]
2023#[allow(clippy::unwrap_used)]
2024mod tests {
2025 use super::*;
2026 use crate::ChartOfAccountsGenerator;
2027
2028 #[test]
2029 fn test_generate_balanced_entries() {
2030 let mut coa_gen =
2031 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2032 let coa = Arc::new(coa_gen.generate());
2033
2034 let mut je_gen = JournalEntryGenerator::new_with_params(
2035 TransactionConfig::default(),
2036 coa,
2037 vec!["1000".to_string()],
2038 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2039 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2040 42,
2041 );
2042
2043 let mut balanced_count = 0;
2044 for _ in 0..100 {
2045 let entry = je_gen.generate();
2046
2047 let has_human_error = entry
2049 .header
2050 .header_text
2051 .as_ref()
2052 .map(|t| t.contains("[HUMAN_ERROR:"))
2053 .unwrap_or(false);
2054
2055 if !has_human_error {
2056 assert!(
2057 entry.is_balanced(),
2058 "Entry {:?} is not balanced",
2059 entry.header.document_id
2060 );
2061 balanced_count += 1;
2062 }
2063 assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
2064 }
2065
2066 assert!(
2068 balanced_count >= 80,
2069 "Expected at least 80 balanced entries, got {}",
2070 balanced_count
2071 );
2072 }
2073
2074 #[test]
2075 fn test_deterministic_generation() {
2076 let mut coa_gen =
2077 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2078 let coa = Arc::new(coa_gen.generate());
2079
2080 let mut gen1 = JournalEntryGenerator::new_with_params(
2081 TransactionConfig::default(),
2082 Arc::clone(&coa),
2083 vec!["1000".to_string()],
2084 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2085 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2086 42,
2087 );
2088
2089 let mut gen2 = JournalEntryGenerator::new_with_params(
2090 TransactionConfig::default(),
2091 coa,
2092 vec!["1000".to_string()],
2093 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2094 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2095 42,
2096 );
2097
2098 for _ in 0..50 {
2099 let e1 = gen1.generate();
2100 let e2 = gen2.generate();
2101 assert_eq!(e1.header.document_id, e2.header.document_id);
2102 assert_eq!(e1.total_debit(), e2.total_debit());
2103 }
2104 }
2105
2106 #[test]
2107 fn test_templates_generate_descriptions() {
2108 let mut coa_gen =
2109 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2110 let coa = Arc::new(coa_gen.generate());
2111
2112 let template_config = TemplateConfig {
2114 names: datasynth_config::schema::NameTemplateConfig {
2115 generate_realistic_names: true,
2116 email_domain: "test.com".to_string(),
2117 culture_distribution: datasynth_config::schema::CultureDistribution::default(),
2118 },
2119 descriptions: datasynth_config::schema::DescriptionTemplateConfig {
2120 generate_header_text: true,
2121 generate_line_text: true,
2122 },
2123 references: datasynth_config::schema::ReferenceTemplateConfig {
2124 generate_references: true,
2125 invoice_prefix: "TEST-INV".to_string(),
2126 po_prefix: "TEST-PO".to_string(),
2127 so_prefix: "TEST-SO".to_string(),
2128 },
2129 };
2130
2131 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2132 TransactionConfig::default(),
2133 coa,
2134 vec!["1000".to_string()],
2135 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2136 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2137 42,
2138 template_config,
2139 None,
2140 )
2141 .with_persona_errors(false); for _ in 0..10 {
2144 let entry = je_gen.generate();
2145
2146 assert!(
2148 entry.header.header_text.is_some(),
2149 "Header text should be populated"
2150 );
2151
2152 assert!(
2154 entry.header.reference.is_some(),
2155 "Reference should be populated"
2156 );
2157
2158 assert!(
2160 entry.header.business_process.is_some(),
2161 "Business process should be set"
2162 );
2163
2164 for line in &entry.lines {
2166 assert!(line.line_text.is_some(), "Line text should be populated");
2167 }
2168
2169 assert!(entry.is_balanced());
2171 }
2172 }
2173
2174 #[test]
2175 fn test_user_pool_integration() {
2176 let mut coa_gen =
2177 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2178 let coa = Arc::new(coa_gen.generate());
2179
2180 let companies = vec!["1000".to_string()];
2181
2182 let mut user_gen = crate::UserGenerator::new(42);
2184 let user_pool = user_gen.generate_standard(&companies);
2185
2186 let mut je_gen = JournalEntryGenerator::new_with_full_config(
2187 TransactionConfig::default(),
2188 coa,
2189 companies,
2190 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2191 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2192 42,
2193 TemplateConfig::default(),
2194 Some(user_pool),
2195 );
2196
2197 for _ in 0..20 {
2199 let entry = je_gen.generate();
2200
2201 assert!(!entry.header.created_by.is_empty());
2204 }
2205 }
2206
2207 #[test]
2208 fn test_master_data_connection() {
2209 let mut coa_gen =
2210 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2211 let coa = Arc::new(coa_gen.generate());
2212
2213 let vendors = vec![
2215 Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
2216 Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
2217 ];
2218
2219 let customers = vec![
2221 Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
2222 Customer::new(
2223 "C-TEST-002",
2224 "Test Customer Two",
2225 CustomerType::SmallBusiness,
2226 ),
2227 ];
2228
2229 let materials = vec![Material::new(
2231 "MAT-TEST-001",
2232 "Test Material A",
2233 MaterialType::RawMaterial,
2234 )];
2235
2236 let generator = JournalEntryGenerator::new_with_params(
2238 TransactionConfig::default(),
2239 coa,
2240 vec!["1000".to_string()],
2241 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2242 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2243 42,
2244 );
2245
2246 assert!(!generator.is_using_real_master_data());
2248
2249 let generator_with_data = generator
2251 .with_vendors(&vendors)
2252 .with_customers(&customers)
2253 .with_materials(&materials);
2254
2255 assert!(generator_with_data.is_using_real_master_data());
2257 }
2258
2259 #[test]
2260 fn test_with_master_data_convenience_method() {
2261 let mut coa_gen =
2262 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2263 let coa = Arc::new(coa_gen.generate());
2264
2265 let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
2266 let customers = vec![Customer::new(
2267 "C-001",
2268 "Customer One",
2269 CustomerType::Corporate,
2270 )];
2271 let materials = vec![Material::new(
2272 "MAT-001",
2273 "Material One",
2274 MaterialType::RawMaterial,
2275 )];
2276
2277 let generator = JournalEntryGenerator::new_with_params(
2278 TransactionConfig::default(),
2279 coa,
2280 vec!["1000".to_string()],
2281 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2282 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2283 42,
2284 )
2285 .with_master_data(&vendors, &customers, &materials);
2286
2287 assert!(generator.is_using_real_master_data());
2288 }
2289
2290 #[test]
2291 fn test_stress_factors_increase_error_rate() {
2292 let mut coa_gen =
2293 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2294 let coa = Arc::new(coa_gen.generate());
2295
2296 let generator = JournalEntryGenerator::new_with_params(
2297 TransactionConfig::default(),
2298 coa,
2299 vec!["1000".to_string()],
2300 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2301 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2302 42,
2303 );
2304
2305 let base_rate = 0.1;
2306
2307 let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
2310 assert!(
2311 (regular_rate - base_rate).abs() < 0.01,
2312 "Regular day should have minimal stress factor adjustment"
2313 );
2314
2315 let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
2318 assert!(
2319 month_end_rate > regular_rate,
2320 "Month end should have higher error rate than regular day"
2321 );
2322
2323 let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
2326 assert!(
2327 year_end_rate > month_end_rate,
2328 "Year end should have highest error rate"
2329 );
2330
2331 let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
2334 assert!(
2335 friday_rate > regular_rate,
2336 "Friday should have higher error rate than mid-week"
2337 );
2338
2339 let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
2342 assert!(
2343 monday_rate > regular_rate,
2344 "Monday should have higher error rate than mid-week"
2345 );
2346 }
2347
2348 #[test]
2349 fn test_batching_produces_similar_entries() {
2350 let mut coa_gen =
2351 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2352 let coa = Arc::new(coa_gen.generate());
2353
2354 let mut je_gen = JournalEntryGenerator::new_with_params(
2356 TransactionConfig::default(),
2357 coa,
2358 vec!["1000".to_string()],
2359 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2360 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2361 123,
2362 )
2363 .with_persona_errors(false); let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
2367
2368 for entry in &entries {
2370 assert!(
2371 entry.is_balanced(),
2372 "All entries including batched should be balanced"
2373 );
2374 }
2375
2376 let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
2378 std::collections::HashMap::new();
2379 for entry in &entries {
2380 *date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
2381 }
2382
2383 let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
2385 assert!(
2386 dates_with_multiple > 0,
2387 "With batching, should see some dates with multiple entries"
2388 );
2389 }
2390
2391 #[test]
2392 fn test_temporal_patterns_business_days() {
2393 use datasynth_config::schema::{
2394 BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
2395 };
2396
2397 let mut coa_gen =
2398 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2399 let coa = Arc::new(coa_gen.generate());
2400
2401 let temporal_config = TemporalPatternsConfig {
2403 enabled: true,
2404 business_days: BusinessDaySchemaConfig {
2405 enabled: true,
2406 ..Default::default()
2407 },
2408 calendars: CalendarSchemaConfig {
2409 regions: vec!["US".to_string()],
2410 custom_holidays: vec![],
2411 },
2412 ..Default::default()
2413 };
2414
2415 let mut je_gen = JournalEntryGenerator::new_with_params(
2416 TransactionConfig::default(),
2417 coa,
2418 vec!["1000".to_string()],
2419 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2420 NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
2422 )
2423 .with_temporal_patterns(temporal_config, 42)
2424 .with_persona_errors(false);
2425
2426 let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
2428
2429 for entry in &entries {
2430 let weekday = entry.header.posting_date.weekday();
2431 assert!(
2432 weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
2433 "Posting date {:?} should not be a weekend",
2434 entry.header.posting_date
2435 );
2436 }
2437 }
2438
2439 #[test]
2440 fn test_default_generation_filters_weekends() {
2441 let mut coa_gen =
2445 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2446 let coa = Arc::new(coa_gen.generate());
2447
2448 let mut je_gen = JournalEntryGenerator::new_with_params(
2449 TransactionConfig::default(),
2450 coa,
2451 vec!["1000".to_string()],
2452 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2453 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2454 42,
2455 )
2456 .with_persona_errors(false);
2457
2458 let total = 500;
2459 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2460
2461 let weekend_count = entries
2462 .iter()
2463 .filter(|e| {
2464 let wd = e.header.posting_date.weekday();
2465 wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
2466 })
2467 .count();
2468
2469 let weekend_pct = weekend_count as f64 / total as f64;
2470 assert!(
2471 weekend_pct < 0.05,
2472 "Expected weekend entries <5% of total without temporal_patterns enabled, \
2473 but got {:.1}% ({}/{})",
2474 weekend_pct * 100.0,
2475 weekend_count,
2476 total
2477 );
2478 }
2479
2480 #[test]
2481 fn test_document_type_derived_from_business_process() {
2482 let mut coa_gen =
2483 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2484 let coa = Arc::new(coa_gen.generate());
2485
2486 let mut je_gen = JournalEntryGenerator::new_with_params(
2487 TransactionConfig::default(),
2488 coa,
2489 vec!["1000".to_string()],
2490 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2491 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2492 99,
2493 )
2494 .with_persona_errors(false)
2495 .with_batching(false);
2496
2497 let total = 200;
2498 let mut doc_types = std::collections::HashSet::new();
2499 let mut sa_count = 0_usize;
2500
2501 for _ in 0..total {
2502 let entry = je_gen.generate();
2503 let dt = &entry.header.document_type;
2504 doc_types.insert(dt.clone());
2505 if dt == "SA" {
2506 sa_count += 1;
2507 }
2508 }
2509
2510 assert!(
2512 doc_types.len() > 3,
2513 "Expected >3 distinct document types, got {} ({:?})",
2514 doc_types.len(),
2515 doc_types,
2516 );
2517
2518 let sa_pct = sa_count as f64 / total as f64;
2520 assert!(
2521 sa_pct < 0.50,
2522 "Expected SA <50%, got {:.1}% ({}/{})",
2523 sa_pct * 100.0,
2524 sa_count,
2525 total,
2526 );
2527 }
2528
2529 #[test]
2530 fn test_enrich_line_items_account_description() {
2531 let mut coa_gen =
2532 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2533 let coa = Arc::new(coa_gen.generate());
2534
2535 let mut je_gen = JournalEntryGenerator::new_with_params(
2536 TransactionConfig::default(),
2537 coa,
2538 vec!["1000".to_string()],
2539 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2540 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2541 42,
2542 )
2543 .with_persona_errors(false);
2544
2545 let total = 200;
2546 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2547
2548 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2550 let lines_with_desc: usize = entries
2551 .iter()
2552 .flat_map(|e| &e.lines)
2553 .filter(|l| l.account_description.is_some())
2554 .count();
2555
2556 let desc_pct = lines_with_desc as f64 / total_lines as f64;
2557 assert!(
2558 desc_pct > 0.95,
2559 "Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
2560 desc_pct * 100.0,
2561 lines_with_desc,
2562 total_lines,
2563 );
2564 }
2565
2566 #[test]
2567 fn test_enrich_line_items_cost_center_for_expense_accounts() {
2568 let mut coa_gen =
2569 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2570 let coa = Arc::new(coa_gen.generate());
2571
2572 let mut je_gen = JournalEntryGenerator::new_with_params(
2573 TransactionConfig::default(),
2574 coa,
2575 vec!["1000".to_string()],
2576 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2577 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2578 42,
2579 )
2580 .with_persona_errors(false);
2581
2582 let total = 300;
2583 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2584
2585 let expense_lines: Vec<&JournalEntryLine> = entries
2587 .iter()
2588 .flat_map(|e| &e.lines)
2589 .filter(|l| {
2590 let first = l.gl_account.chars().next().unwrap_or('0');
2591 first == '5' || first == '6'
2592 })
2593 .collect();
2594
2595 if !expense_lines.is_empty() {
2596 let with_cc = expense_lines
2597 .iter()
2598 .filter(|l| l.cost_center.is_some())
2599 .count();
2600 let cc_pct = with_cc as f64 / expense_lines.len() as f64;
2601 assert!(
2602 cc_pct > 0.80,
2603 "Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
2604 cc_pct * 100.0,
2605 with_cc,
2606 expense_lines.len(),
2607 );
2608 }
2609 }
2610
2611 #[test]
2612 fn test_enrich_line_items_profit_center_and_line_text() {
2613 let mut coa_gen =
2614 ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
2615 let coa = Arc::new(coa_gen.generate());
2616
2617 let mut je_gen = JournalEntryGenerator::new_with_params(
2618 TransactionConfig::default(),
2619 coa,
2620 vec!["1000".to_string()],
2621 NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
2622 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2623 42,
2624 )
2625 .with_persona_errors(false);
2626
2627 let total = 100;
2628 let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
2629
2630 let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
2631
2632 let with_pc = entries
2634 .iter()
2635 .flat_map(|e| &e.lines)
2636 .filter(|l| l.profit_center.is_some())
2637 .count();
2638 let pc_pct = with_pc as f64 / total_lines as f64;
2639 assert!(
2640 pc_pct > 0.95,
2641 "Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
2642 pc_pct * 100.0,
2643 with_pc,
2644 total_lines,
2645 );
2646
2647 let with_text = entries
2649 .iter()
2650 .flat_map(|e| &e.lines)
2651 .filter(|l| l.line_text.is_some())
2652 .count();
2653 let text_pct = with_text as f64 / total_lines as f64;
2654 assert!(
2655 text_pct > 0.95,
2656 "Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
2657 text_pct * 100.0,
2658 with_text,
2659 total_lines,
2660 );
2661 }
2662}