1use std::collections::HashMap;
15use std::path::PathBuf;
16use std::sync::Arc;
17
18use chrono::{Datelike, NaiveDate};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{debug, info, warn};
21
22use datasynth_banking::{
23 models::{BankAccount, BankTransaction, BankingCustomer},
24 BankingOrchestratorBuilder,
25};
26use datasynth_config::schema::GeneratorConfig;
27use datasynth_core::error::{SynthError, SynthResult};
28use datasynth_core::models::audit::{
29 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
30};
31use datasynth_core::models::subledger::ap::APInvoice;
32use datasynth_core::models::subledger::ar::ARInvoice;
33use datasynth_core::models::*;
34use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
35use datasynth_generators::{
36 AnomalyInjector,
38 AnomalyInjectorConfig,
39 AssetGenerator,
40 AuditEngagementGenerator,
42 BalanceTrackerConfig,
43 ChartOfAccountsGenerator,
45 CustomerGenerator,
46 DataQualityConfig,
47 DataQualityInjector,
49 DataQualityStats,
50 DocumentFlowJeConfig,
52 DocumentFlowJeGenerator,
53 DocumentFlowLinker,
55 EmployeeGenerator,
56 EvidenceGenerator,
57 FindingGenerator,
58 JournalEntryGenerator,
59 JudgmentGenerator,
60 LatePaymentDistribution,
61 MaterialGenerator,
62 O2CDocumentChain,
63 O2CGenerator,
64 O2CGeneratorConfig,
65 O2CPaymentBehavior,
66 P2PDocumentChain,
67 P2PGenerator,
69 P2PGeneratorConfig,
70 P2PPaymentBehavior,
71 RiskAssessmentGenerator,
72 RunningBalanceTracker,
74 ValidationError,
75 VendorGenerator,
77 WorkpaperGenerator,
78};
79use datasynth_ocpm::{
80 EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
81 P2pDocuments,
82};
83
84use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
85use datasynth_core::models::documents::PaymentMethod;
86
87fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
93 let payment_behavior = &schema_config.payment_behavior;
94 let late_dist = &payment_behavior.late_payment_days_distribution;
95
96 P2PGeneratorConfig {
97 three_way_match_rate: schema_config.three_way_match_rate,
98 partial_delivery_rate: schema_config.partial_delivery_rate,
99 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
101 max_price_variance_percent: schema_config.max_price_variance_percent,
102 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
103 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
104 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
105 payment_method_distribution: vec![
106 (PaymentMethod::BankTransfer, 0.60),
107 (PaymentMethod::Check, 0.25),
108 (PaymentMethod::Wire, 0.10),
109 (PaymentMethod::CreditCard, 0.05),
110 ],
111 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
113 late_payment_rate: payment_behavior.late_payment_rate,
114 late_payment_distribution: LatePaymentDistribution {
115 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
116 late_8_to_14: late_dist.late_8_to_14,
117 very_late_15_to_30: late_dist.very_late_15_to_30,
118 severely_late_31_to_60: late_dist.severely_late_31_to_60,
119 extremely_late_over_60: late_dist.extremely_late_over_60,
120 },
121 partial_payment_rate: payment_behavior.partial_payment_rate,
122 payment_correction_rate: payment_behavior.payment_correction_rate,
123 },
124 }
125}
126
127fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
129 let payment_behavior = &schema_config.payment_behavior;
130
131 O2CGeneratorConfig {
132 credit_check_failure_rate: schema_config.credit_check_failure_rate,
133 partial_shipment_rate: schema_config.partial_shipment_rate,
134 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
135 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
136 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
137 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
139 returns_rate: schema_config.return_rate,
140 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
141 payment_method_distribution: vec![
142 (PaymentMethod::BankTransfer, 0.50),
143 (PaymentMethod::Check, 0.30),
144 (PaymentMethod::Wire, 0.15),
145 (PaymentMethod::CreditCard, 0.05),
146 ],
147 payment_behavior: O2CPaymentBehavior {
148 partial_payment_rate: payment_behavior.partial_payments.rate,
149 short_payment_rate: payment_behavior.short_payments.rate,
150 max_short_percent: payment_behavior.short_payments.max_short_percent,
151 on_account_rate: payment_behavior.on_account_payments.rate,
152 payment_correction_rate: payment_behavior.payment_corrections.rate,
153 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
154 },
155 }
156}
157
158#[derive(Debug, Clone)]
160pub struct PhaseConfig {
161 pub generate_master_data: bool,
163 pub generate_document_flows: bool,
165 pub generate_ocpm_events: bool,
167 pub generate_journal_entries: bool,
169 pub inject_anomalies: bool,
171 pub inject_data_quality: bool,
173 pub validate_balances: bool,
175 pub show_progress: bool,
177 pub vendors_per_company: usize,
179 pub customers_per_company: usize,
181 pub materials_per_company: usize,
183 pub assets_per_company: usize,
185 pub employees_per_company: usize,
187 pub p2p_chains: usize,
189 pub o2c_chains: usize,
191 pub generate_audit: bool,
193 pub audit_engagements: usize,
195 pub workpapers_per_engagement: usize,
197 pub evidence_per_workpaper: usize,
199 pub risks_per_engagement: usize,
201 pub findings_per_engagement: usize,
203 pub judgments_per_engagement: usize,
205 pub generate_banking: bool,
207}
208
209impl Default for PhaseConfig {
210 fn default() -> Self {
211 Self {
212 generate_master_data: true,
213 generate_document_flows: true,
214 generate_ocpm_events: false, generate_journal_entries: true,
216 inject_anomalies: false,
217 inject_data_quality: false, validate_balances: true,
219 show_progress: true,
220 vendors_per_company: 50,
221 customers_per_company: 100,
222 materials_per_company: 200,
223 assets_per_company: 50,
224 employees_per_company: 100,
225 p2p_chains: 100,
226 o2c_chains: 100,
227 generate_audit: false, audit_engagements: 5,
229 workpapers_per_engagement: 20,
230 evidence_per_workpaper: 5,
231 risks_per_engagement: 15,
232 findings_per_engagement: 8,
233 judgments_per_engagement: 10,
234 generate_banking: false, }
236 }
237}
238
239#[derive(Debug, Clone, Default)]
241pub struct MasterDataSnapshot {
242 pub vendors: Vec<Vendor>,
244 pub customers: Vec<Customer>,
246 pub materials: Vec<Material>,
248 pub assets: Vec<FixedAsset>,
250 pub employees: Vec<Employee>,
252}
253
254#[derive(Debug, Clone, Default)]
256pub struct DocumentFlowSnapshot {
257 pub p2p_chains: Vec<P2PDocumentChain>,
259 pub o2c_chains: Vec<O2CDocumentChain>,
261 pub purchase_orders: Vec<documents::PurchaseOrder>,
263 pub goods_receipts: Vec<documents::GoodsReceipt>,
265 pub vendor_invoices: Vec<documents::VendorInvoice>,
267 pub sales_orders: Vec<documents::SalesOrder>,
269 pub deliveries: Vec<documents::Delivery>,
271 pub customer_invoices: Vec<documents::CustomerInvoice>,
273 pub payments: Vec<documents::Payment>,
275}
276
277#[derive(Debug, Clone, Default)]
279pub struct SubledgerSnapshot {
280 pub ap_invoices: Vec<APInvoice>,
282 pub ar_invoices: Vec<ARInvoice>,
284}
285
286#[derive(Debug, Clone, Default)]
288pub struct OcpmSnapshot {
289 pub event_log: Option<OcpmEventLog>,
291 pub event_count: usize,
293 pub object_count: usize,
295 pub case_count: usize,
297}
298
299#[derive(Debug, Clone, Default)]
301pub struct AuditSnapshot {
302 pub engagements: Vec<AuditEngagement>,
304 pub workpapers: Vec<Workpaper>,
306 pub evidence: Vec<AuditEvidence>,
308 pub risk_assessments: Vec<RiskAssessment>,
310 pub findings: Vec<AuditFinding>,
312 pub judgments: Vec<ProfessionalJudgment>,
314}
315
316#[derive(Debug, Clone, Default)]
318pub struct BankingSnapshot {
319 pub customers: Vec<BankingCustomer>,
321 pub accounts: Vec<BankAccount>,
323 pub transactions: Vec<BankTransaction>,
325 pub suspicious_count: usize,
327 pub scenario_count: usize,
329}
330
331#[derive(Debug, Clone, Default)]
333pub struct AnomalyLabels {
334 pub labels: Vec<LabeledAnomaly>,
336 pub summary: Option<AnomalySummary>,
338 pub by_type: HashMap<String, usize>,
340}
341
342#[derive(Debug, Clone, Default)]
344pub struct BalanceValidationResult {
345 pub validated: bool,
347 pub is_balanced: bool,
349 pub entries_processed: u64,
351 pub total_debits: rust_decimal::Decimal,
353 pub total_credits: rust_decimal::Decimal,
355 pub accounts_tracked: usize,
357 pub companies_tracked: usize,
359 pub validation_errors: Vec<ValidationError>,
361 pub has_unbalanced_entries: bool,
363}
364
365#[derive(Debug)]
367pub struct EnhancedGenerationResult {
368 pub chart_of_accounts: ChartOfAccounts,
370 pub master_data: MasterDataSnapshot,
372 pub document_flows: DocumentFlowSnapshot,
374 pub subledger: SubledgerSnapshot,
376 pub ocpm: OcpmSnapshot,
378 pub audit: AuditSnapshot,
380 pub banking: BankingSnapshot,
382 pub journal_entries: Vec<JournalEntry>,
384 pub anomaly_labels: AnomalyLabels,
386 pub balance_validation: BalanceValidationResult,
388 pub data_quality_stats: DataQualityStats,
390 pub statistics: EnhancedGenerationStatistics,
392}
393
394#[derive(Debug, Clone, Default)]
396pub struct EnhancedGenerationStatistics {
397 pub total_entries: u64,
399 pub total_line_items: u64,
401 pub accounts_count: usize,
403 pub companies_count: usize,
405 pub period_months: u32,
407 pub vendor_count: usize,
409 pub customer_count: usize,
410 pub material_count: usize,
411 pub asset_count: usize,
412 pub employee_count: usize,
413 pub p2p_chain_count: usize,
415 pub o2c_chain_count: usize,
416 pub ap_invoice_count: usize,
418 pub ar_invoice_count: usize,
419 pub ocpm_event_count: usize,
421 pub ocpm_object_count: usize,
422 pub ocpm_case_count: usize,
423 pub audit_engagement_count: usize,
425 pub audit_workpaper_count: usize,
426 pub audit_evidence_count: usize,
427 pub audit_risk_count: usize,
428 pub audit_finding_count: usize,
429 pub audit_judgment_count: usize,
430 pub anomalies_injected: usize,
432 pub data_quality_issues: usize,
434 pub banking_customer_count: usize,
436 pub banking_account_count: usize,
437 pub banking_transaction_count: usize,
438 pub banking_suspicious_count: usize,
439}
440
441pub struct EnhancedOrchestrator {
443 config: GeneratorConfig,
444 phase_config: PhaseConfig,
445 coa: Option<Arc<ChartOfAccounts>>,
446 master_data: MasterDataSnapshot,
447 seed: u64,
448 multi_progress: Option<MultiProgress>,
449 resource_guard: ResourceGuard,
451 output_path: Option<PathBuf>,
453}
454
455impl EnhancedOrchestrator {
456 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
458 datasynth_config::validate_config(&config)?;
459
460 let seed = config.global.seed.unwrap_or_else(rand::random);
461
462 let resource_guard = Self::build_resource_guard(&config, None);
464
465 Ok(Self {
466 config,
467 phase_config,
468 coa: None,
469 master_data: MasterDataSnapshot::default(),
470 seed,
471 multi_progress: None,
472 resource_guard,
473 output_path: None,
474 })
475 }
476
477 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
479 Self::new(config, PhaseConfig::default())
480 }
481
482 pub fn with_progress(mut self, show: bool) -> Self {
484 self.phase_config.show_progress = show;
485 if show {
486 self.multi_progress = Some(MultiProgress::new());
487 }
488 self
489 }
490
491 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
493 let path = path.into();
494 self.output_path = Some(path.clone());
495 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
497 self
498 }
499
500 fn build_resource_guard(
502 config: &GeneratorConfig,
503 output_path: Option<PathBuf>,
504 ) -> ResourceGuard {
505 let mut builder = ResourceGuardBuilder::new();
506
507 if config.global.memory_limit_mb > 0 {
509 builder = builder.memory_limit(config.global.memory_limit_mb);
510 }
511
512 if let Some(path) = output_path {
514 builder = builder.output_path(path).min_free_disk(100); }
516
517 builder = builder.conservative();
519
520 builder.build()
521 }
522
523 fn check_resources(&self) -> SynthResult<DegradationLevel> {
528 self.resource_guard.check()
529 }
530
531 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
533 let level = self.resource_guard.check()?;
534
535 if level != DegradationLevel::Normal {
536 warn!(
537 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
538 phase,
539 level,
540 self.resource_guard.current_memory_mb(),
541 self.resource_guard.available_disk_mb()
542 );
543 }
544
545 Ok(level)
546 }
547
548 fn get_degradation_actions(&self) -> DegradationActions {
550 self.resource_guard.get_actions()
551 }
552
553 fn check_memory_limit(&self) -> SynthResult<()> {
555 self.check_resources()?;
556 Ok(())
557 }
558
559 #[allow(clippy::field_reassign_with_default)]
561 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
562 info!("Starting enhanced generation workflow");
563 info!(
564 "Config: industry={:?}, period_months={}, companies={}",
565 self.config.global.industry,
566 self.config.global.period_months,
567 self.config.companies.len()
568 );
569
570 let initial_level = self.check_resources_with_log("initial")?;
572 if initial_level == DegradationLevel::Emergency {
573 return Err(SynthError::resource(
574 "Insufficient resources to start generation",
575 ));
576 }
577
578 let mut stats = EnhancedGenerationStatistics::default();
579 stats.companies_count = self.config.companies.len();
580 stats.period_months = self.config.global.period_months;
581
582 info!("Phase 1: Generating Chart of Accounts");
584 let coa = self.generate_coa()?;
585 stats.accounts_count = coa.account_count();
586 info!(
587 "Chart of Accounts generated: {} accounts",
588 stats.accounts_count
589 );
590
591 self.check_resources_with_log("post-coa")?;
593
594 if self.phase_config.generate_master_data {
596 info!("Phase 2: Generating Master Data");
597 self.generate_master_data()?;
598 stats.vendor_count = self.master_data.vendors.len();
599 stats.customer_count = self.master_data.customers.len();
600 stats.material_count = self.master_data.materials.len();
601 stats.asset_count = self.master_data.assets.len();
602 stats.employee_count = self.master_data.employees.len();
603 info!(
604 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
605 stats.vendor_count, stats.customer_count, stats.material_count,
606 stats.asset_count, stats.employee_count
607 );
608
609 self.check_resources_with_log("post-master-data")?;
611 } else {
612 debug!("Phase 2: Skipped (master data generation disabled)");
613 }
614
615 let mut document_flows = DocumentFlowSnapshot::default();
617 let mut subledger = SubledgerSnapshot::default();
618 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
619 info!("Phase 3: Generating Document Flows");
620 self.generate_document_flows(&mut document_flows)?;
621 stats.p2p_chain_count = document_flows.p2p_chains.len();
622 stats.o2c_chain_count = document_flows.o2c_chains.len();
623 info!(
624 "Document flows generated: {} P2P chains, {} O2C chains",
625 stats.p2p_chain_count, stats.o2c_chain_count
626 );
627
628 debug!("Phase 3b: Linking document flows to subledgers");
630 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
631 stats.ap_invoice_count = subledger.ap_invoices.len();
632 stats.ar_invoice_count = subledger.ar_invoices.len();
633 debug!(
634 "Subledgers linked: {} AP invoices, {} AR invoices",
635 stats.ap_invoice_count, stats.ar_invoice_count
636 );
637
638 self.check_resources_with_log("post-document-flows")?;
640 } else {
641 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
642 }
643
644 let mut ocpm_snapshot = OcpmSnapshot::default();
646 if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
647 info!("Phase 3c: Generating OCPM Events");
648 ocpm_snapshot = self.generate_ocpm_events(&document_flows)?;
649 stats.ocpm_event_count = ocpm_snapshot.event_count;
650 stats.ocpm_object_count = ocpm_snapshot.object_count;
651 stats.ocpm_case_count = ocpm_snapshot.case_count;
652 info!(
653 "OCPM events generated: {} events, {} objects, {} cases",
654 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
655 );
656
657 self.check_resources_with_log("post-ocpm")?;
659 } else {
660 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
661 }
662
663 let mut entries = Vec::new();
665
666 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
668 debug!("Phase 4a: Generating JEs from document flows");
669 let flow_entries = self.generate_jes_from_document_flows(&document_flows)?;
670 debug!("Generated {} JEs from document flows", flow_entries.len());
671 entries.extend(flow_entries);
672 }
673
674 if self.phase_config.generate_journal_entries {
676 info!("Phase 4: Generating Journal Entries");
677 let je_entries = self.generate_journal_entries(&coa)?;
678 info!("Generated {} standalone journal entries", je_entries.len());
679 entries.extend(je_entries);
680 } else {
681 debug!("Phase 4: Skipped (journal entry generation disabled)");
682 }
683
684 if !entries.is_empty() {
685 stats.total_entries = entries.len() as u64;
686 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
687 info!(
688 "Total entries: {}, total line items: {}",
689 stats.total_entries, stats.total_line_items
690 );
691
692 self.check_resources_with_log("post-journal-entries")?;
694 }
695
696 let actions = self.get_degradation_actions();
698
699 let mut anomaly_labels = AnomalyLabels::default();
701 if self.phase_config.inject_anomalies
702 && !entries.is_empty()
703 && !actions.skip_anomaly_injection
704 {
705 info!("Phase 5: Injecting Anomalies");
706 let result = self.inject_anomalies(&mut entries)?;
707 stats.anomalies_injected = result.labels.len();
708 anomaly_labels = result;
709 info!("Injected {} anomalies", stats.anomalies_injected);
710
711 self.check_resources_with_log("post-anomaly-injection")?;
713 } else if actions.skip_anomaly_injection {
714 warn!("Phase 5: Skipped due to resource degradation");
715 } else {
716 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
717 }
718
719 let mut balance_validation = BalanceValidationResult::default();
721 if self.phase_config.validate_balances && !entries.is_empty() {
722 debug!("Phase 6: Validating Balances");
723 balance_validation = self.validate_journal_entries(&entries)?;
724 if balance_validation.is_balanced {
725 debug!("Balance validation passed");
726 } else {
727 warn!(
728 "Balance validation found {} errors",
729 balance_validation.validation_errors.len()
730 );
731 }
732 }
733
734 let mut data_quality_stats = DataQualityStats::default();
736 if self.phase_config.inject_data_quality
737 && !entries.is_empty()
738 && !actions.skip_data_quality
739 {
740 info!("Phase 7: Injecting Data Quality Variations");
741 data_quality_stats = self.inject_data_quality(&mut entries)?;
742 stats.data_quality_issues = data_quality_stats.records_with_issues;
743 info!("Injected {} data quality issues", stats.data_quality_issues);
744
745 self.check_resources_with_log("post-data-quality")?;
747 } else if actions.skip_data_quality {
748 warn!("Phase 7: Skipped due to resource degradation");
749 } else {
750 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
751 }
752
753 let mut audit_snapshot = AuditSnapshot::default();
755 if self.phase_config.generate_audit {
756 info!("Phase 8: Generating Audit Data");
757 audit_snapshot = self.generate_audit_data(&entries)?;
758 stats.audit_engagement_count = audit_snapshot.engagements.len();
759 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
760 stats.audit_evidence_count = audit_snapshot.evidence.len();
761 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
762 stats.audit_finding_count = audit_snapshot.findings.len();
763 stats.audit_judgment_count = audit_snapshot.judgments.len();
764 info!(
765 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
766 stats.audit_engagement_count, stats.audit_workpaper_count,
767 stats.audit_evidence_count, stats.audit_risk_count,
768 stats.audit_finding_count, stats.audit_judgment_count
769 );
770
771 self.check_resources_with_log("post-audit")?;
773 } else {
774 debug!("Phase 8: Skipped (audit generation disabled)");
775 }
776
777 let mut banking_snapshot = BankingSnapshot::default();
779 if self.phase_config.generate_banking && self.config.banking.enabled {
780 info!("Phase 9: Generating Banking KYC/AML Data");
781 banking_snapshot = self.generate_banking_data()?;
782 stats.banking_customer_count = banking_snapshot.customers.len();
783 stats.banking_account_count = banking_snapshot.accounts.len();
784 stats.banking_transaction_count = banking_snapshot.transactions.len();
785 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
786 info!(
787 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
788 stats.banking_customer_count, stats.banking_account_count,
789 stats.banking_transaction_count, stats.banking_suspicious_count
790 );
791
792 self.check_resources_with_log("post-banking")?;
794 } else {
795 debug!("Phase 9: Skipped (banking generation disabled)");
796 }
797
798 let resource_stats = self.resource_guard.stats();
800 info!(
801 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
802 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
803 resource_stats.disk.estimated_bytes_written,
804 resource_stats.degradation_level
805 );
806
807 Ok(EnhancedGenerationResult {
808 chart_of_accounts: (*coa).clone(),
809 master_data: self.master_data.clone(),
810 document_flows,
811 subledger,
812 ocpm: ocpm_snapshot,
813 audit: audit_snapshot,
814 banking: banking_snapshot,
815 journal_entries: entries,
816 anomaly_labels,
817 balance_validation,
818 data_quality_stats,
819 statistics: stats,
820 })
821 }
822
823 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
825 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
826
827 let mut gen = ChartOfAccountsGenerator::new(
828 self.config.chart_of_accounts.complexity,
829 self.config.global.industry,
830 self.seed,
831 );
832
833 let coa = Arc::new(gen.generate());
834 self.coa = Some(Arc::clone(&coa));
835
836 if let Some(pb) = pb {
837 pb.finish_with_message("Chart of Accounts complete");
838 }
839
840 Ok(coa)
841 }
842
843 fn generate_master_data(&mut self) -> SynthResult<()> {
845 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
846 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
847 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
848
849 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
851
852 for (i, company) in self.config.companies.iter().enumerate() {
853 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
854
855 let mut vendor_gen = VendorGenerator::new(company_seed);
857 let vendor_pool = vendor_gen.generate_vendor_pool(
858 self.phase_config.vendors_per_company,
859 &company.code,
860 start_date,
861 );
862 self.master_data.vendors.extend(vendor_pool.vendors);
863 if let Some(pb) = &pb {
864 pb.inc(1);
865 }
866
867 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
869 let customer_pool = customer_gen.generate_customer_pool(
870 self.phase_config.customers_per_company,
871 &company.code,
872 start_date,
873 );
874 self.master_data.customers.extend(customer_pool.customers);
875 if let Some(pb) = &pb {
876 pb.inc(1);
877 }
878
879 let mut material_gen = MaterialGenerator::new(company_seed + 200);
881 let material_pool = material_gen.generate_material_pool(
882 self.phase_config.materials_per_company,
883 &company.code,
884 start_date,
885 );
886 self.master_data.materials.extend(material_pool.materials);
887 if let Some(pb) = &pb {
888 pb.inc(1);
889 }
890
891 let mut asset_gen = AssetGenerator::new(company_seed + 300);
893 let asset_pool = asset_gen.generate_asset_pool(
894 self.phase_config.assets_per_company,
895 &company.code,
896 (start_date, end_date),
897 );
898 self.master_data.assets.extend(asset_pool.assets);
899 if let Some(pb) = &pb {
900 pb.inc(1);
901 }
902
903 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
905 let employee_pool =
906 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
907 self.master_data.employees.extend(employee_pool.employees);
908 if let Some(pb) = &pb {
909 pb.inc(1);
910 }
911 }
912
913 if let Some(pb) = pb {
914 pb.finish_with_message("Master data generation complete");
915 }
916
917 Ok(())
918 }
919
920 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
922 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
923 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
924
925 let p2p_count = self
927 .phase_config
928 .p2p_chains
929 .min(self.master_data.vendors.len() * 2);
930 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
931
932 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
934 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
935
936 for i in 0..p2p_count {
937 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
938 let materials: Vec<&Material> = self
939 .master_data
940 .materials
941 .iter()
942 .skip(i % self.master_data.materials.len().max(1))
943 .take(2.min(self.master_data.materials.len()))
944 .collect();
945
946 if materials.is_empty() {
947 continue;
948 }
949
950 let company = &self.config.companies[i % self.config.companies.len()];
951 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
952 let fiscal_period = po_date.month() as u8;
953 let created_by = self
954 .master_data
955 .employees
956 .first()
957 .map(|e| e.user_id.as_str())
958 .unwrap_or("SYSTEM");
959
960 let chain = p2p_gen.generate_chain(
961 &company.code,
962 vendor,
963 &materials,
964 po_date,
965 start_date.year() as u16,
966 fiscal_period,
967 created_by,
968 );
969
970 flows.purchase_orders.push(chain.purchase_order.clone());
972 flows.goods_receipts.extend(chain.goods_receipts.clone());
973 if let Some(vi) = &chain.vendor_invoice {
974 flows.vendor_invoices.push(vi.clone());
975 }
976 if let Some(payment) = &chain.payment {
977 flows.payments.push(payment.clone());
978 }
979 flows.p2p_chains.push(chain);
980
981 if let Some(pb) = &pb {
982 pb.inc(1);
983 }
984 }
985
986 if let Some(pb) = pb {
987 pb.finish_with_message("P2P document flows complete");
988 }
989
990 let o2c_count = self
992 .phase_config
993 .o2c_chains
994 .min(self.master_data.customers.len() * 2);
995 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
996
997 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
999 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
1000
1001 for i in 0..o2c_count {
1002 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
1003 let materials: Vec<&Material> = self
1004 .master_data
1005 .materials
1006 .iter()
1007 .skip(i % self.master_data.materials.len().max(1))
1008 .take(2.min(self.master_data.materials.len()))
1009 .collect();
1010
1011 if materials.is_empty() {
1012 continue;
1013 }
1014
1015 let company = &self.config.companies[i % self.config.companies.len()];
1016 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
1017 let fiscal_period = so_date.month() as u8;
1018 let created_by = self
1019 .master_data
1020 .employees
1021 .first()
1022 .map(|e| e.user_id.as_str())
1023 .unwrap_or("SYSTEM");
1024
1025 let chain = o2c_gen.generate_chain(
1026 &company.code,
1027 customer,
1028 &materials,
1029 so_date,
1030 start_date.year() as u16,
1031 fiscal_period,
1032 created_by,
1033 );
1034
1035 flows.sales_orders.push(chain.sales_order.clone());
1037 flows.deliveries.extend(chain.deliveries.clone());
1038 if let Some(ci) = &chain.customer_invoice {
1039 flows.customer_invoices.push(ci.clone());
1040 }
1041 if let Some(receipt) = &chain.customer_receipt {
1042 flows.payments.push(receipt.clone());
1043 }
1044 flows.o2c_chains.push(chain);
1045
1046 if let Some(pb) = &pb {
1047 pb.inc(1);
1048 }
1049 }
1050
1051 if let Some(pb) = pb {
1052 pb.finish_with_message("O2C document flows complete");
1053 }
1054
1055 Ok(())
1056 }
1057
1058 fn generate_journal_entries(
1060 &mut self,
1061 coa: &Arc<ChartOfAccounts>,
1062 ) -> SynthResult<Vec<JournalEntry>> {
1063 let total = self.calculate_total_transactions();
1064 let pb = self.create_progress_bar(total, "Generating Journal Entries");
1065
1066 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1067 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1068 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1069
1070 let company_codes: Vec<String> = self
1071 .config
1072 .companies
1073 .iter()
1074 .map(|c| c.code.clone())
1075 .collect();
1076
1077 let generator = JournalEntryGenerator::new_with_params(
1078 self.config.transactions.clone(),
1079 Arc::clone(coa),
1080 company_codes,
1081 start_date,
1082 end_date,
1083 self.seed,
1084 );
1085
1086 let mut generator = generator
1090 .with_master_data(
1091 &self.master_data.vendors,
1092 &self.master_data.customers,
1093 &self.master_data.materials,
1094 )
1095 .with_persona_errors(true)
1096 .with_fraud_config(self.config.fraud.clone());
1097
1098 let mut entries = Vec::with_capacity(total as usize);
1099
1100 self.check_memory_limit()?;
1102
1103 const MEMORY_CHECK_INTERVAL: u64 = 1000;
1105
1106 for i in 0..total {
1107 let entry = generator.generate();
1108 entries.push(entry);
1109 if let Some(pb) = &pb {
1110 pb.inc(1);
1111 }
1112
1113 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
1115 self.check_memory_limit()?;
1116 }
1117 }
1118
1119 if let Some(pb) = pb {
1120 pb.finish_with_message("Journal entries complete");
1121 }
1122
1123 Ok(entries)
1124 }
1125
1126 fn generate_jes_from_document_flows(
1131 &mut self,
1132 flows: &DocumentFlowSnapshot,
1133 ) -> SynthResult<Vec<JournalEntry>> {
1134 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1135 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
1136
1137 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
1138 DocumentFlowJeConfig::default(),
1139 self.seed,
1140 );
1141 let mut entries = Vec::new();
1142
1143 for chain in &flows.p2p_chains {
1145 let chain_entries = generator.generate_from_p2p_chain(chain);
1146 entries.extend(chain_entries);
1147 if let Some(pb) = &pb {
1148 pb.inc(1);
1149 }
1150 }
1151
1152 for chain in &flows.o2c_chains {
1154 let chain_entries = generator.generate_from_o2c_chain(chain);
1155 entries.extend(chain_entries);
1156 if let Some(pb) = &pb {
1157 pb.inc(1);
1158 }
1159 }
1160
1161 if let Some(pb) = pb {
1162 pb.finish_with_message(format!(
1163 "Generated {} JEs from document flows",
1164 entries.len()
1165 ));
1166 }
1167
1168 Ok(entries)
1169 }
1170
1171 fn link_document_flows_to_subledgers(
1176 &mut self,
1177 flows: &DocumentFlowSnapshot,
1178 ) -> SynthResult<SubledgerSnapshot> {
1179 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
1180 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
1181
1182 let mut linker = DocumentFlowLinker::new();
1183
1184 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
1186 if let Some(pb) = &pb {
1187 pb.inc(flows.vendor_invoices.len() as u64);
1188 }
1189
1190 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
1192 if let Some(pb) = &pb {
1193 pb.inc(flows.customer_invoices.len() as u64);
1194 }
1195
1196 if let Some(pb) = pb {
1197 pb.finish_with_message(format!(
1198 "Linked {} AP and {} AR invoices",
1199 ap_invoices.len(),
1200 ar_invoices.len()
1201 ));
1202 }
1203
1204 Ok(SubledgerSnapshot {
1205 ap_invoices,
1206 ar_invoices,
1207 })
1208 }
1209
1210 fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
1215 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1216 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
1217
1218 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
1220 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
1221
1222 let ocpm_config = OcpmGeneratorConfig {
1224 generate_p2p: true,
1225 generate_o2c: true,
1226 happy_path_rate: 0.75,
1227 exception_path_rate: 0.20,
1228 error_path_rate: 0.05,
1229 add_duration_variability: true,
1230 duration_std_dev_factor: 0.3,
1231 };
1232 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
1233
1234 let available_users: Vec<String> = self
1236 .master_data
1237 .employees
1238 .iter()
1239 .take(20)
1240 .map(|e| e.user_id.clone())
1241 .collect();
1242
1243 for chain in &flows.p2p_chains {
1245 let po = &chain.purchase_order;
1246 let documents = P2pDocuments::new(
1247 &po.header.document_id,
1248 &po.vendor_id,
1249 &po.header.company_code,
1250 po.total_net_amount,
1251 &po.header.currency,
1252 )
1253 .with_goods_receipt(
1254 chain
1255 .goods_receipts
1256 .first()
1257 .map(|gr| gr.header.document_id.as_str())
1258 .unwrap_or(""),
1259 )
1260 .with_invoice(
1261 chain
1262 .vendor_invoice
1263 .as_ref()
1264 .map(|vi| vi.header.document_id.as_str())
1265 .unwrap_or(""),
1266 )
1267 .with_payment(
1268 chain
1269 .payment
1270 .as_ref()
1271 .map(|p| p.header.document_id.as_str())
1272 .unwrap_or(""),
1273 );
1274
1275 let start_time =
1276 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
1277 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
1278
1279 for event in result.events {
1281 event_log.add_event(event);
1282 }
1283 for object in result.objects {
1284 event_log.add_object(object);
1285 }
1286 for relationship in result.relationships {
1287 event_log.add_relationship(relationship);
1288 }
1289 event_log.add_case(result.case_trace);
1290
1291 if let Some(pb) = &pb {
1292 pb.inc(1);
1293 }
1294 }
1295
1296 for chain in &flows.o2c_chains {
1298 let so = &chain.sales_order;
1299 let documents = O2cDocuments::new(
1300 &so.header.document_id,
1301 &so.customer_id,
1302 &so.header.company_code,
1303 so.total_net_amount,
1304 &so.header.currency,
1305 )
1306 .with_delivery(
1307 chain
1308 .deliveries
1309 .first()
1310 .map(|d| d.header.document_id.as_str())
1311 .unwrap_or(""),
1312 )
1313 .with_invoice(
1314 chain
1315 .customer_invoice
1316 .as_ref()
1317 .map(|ci| ci.header.document_id.as_str())
1318 .unwrap_or(""),
1319 )
1320 .with_receipt(
1321 chain
1322 .customer_receipt
1323 .as_ref()
1324 .map(|r| r.header.document_id.as_str())
1325 .unwrap_or(""),
1326 );
1327
1328 let start_time =
1329 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
1330 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
1331
1332 for event in result.events {
1334 event_log.add_event(event);
1335 }
1336 for object in result.objects {
1337 event_log.add_object(object);
1338 }
1339 for relationship in result.relationships {
1340 event_log.add_relationship(relationship);
1341 }
1342 event_log.add_case(result.case_trace);
1343
1344 if let Some(pb) = &pb {
1345 pb.inc(1);
1346 }
1347 }
1348
1349 event_log.compute_variants();
1351
1352 let summary = event_log.summary();
1353
1354 if let Some(pb) = pb {
1355 pb.finish_with_message(format!(
1356 "Generated {} OCPM events, {} objects",
1357 summary.event_count, summary.object_count
1358 ));
1359 }
1360
1361 Ok(OcpmSnapshot {
1362 event_count: summary.event_count,
1363 object_count: summary.object_count,
1364 case_count: summary.case_count,
1365 event_log: Some(event_log),
1366 })
1367 }
1368
1369 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
1371 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
1372
1373 let anomaly_config = AnomalyInjectorConfig {
1374 rates: AnomalyRateConfig {
1375 total_rate: 0.02,
1376 ..Default::default()
1377 },
1378 seed: self.seed + 5000,
1379 ..Default::default()
1380 };
1381
1382 let mut injector = AnomalyInjector::new(anomaly_config);
1383 let result = injector.process_entries(entries);
1384
1385 if let Some(pb) = &pb {
1386 pb.inc(entries.len() as u64);
1387 pb.finish_with_message("Anomaly injection complete");
1388 }
1389
1390 let mut by_type = HashMap::new();
1391 for label in &result.labels {
1392 *by_type
1393 .entry(format!("{:?}", label.anomaly_type))
1394 .or_insert(0) += 1;
1395 }
1396
1397 Ok(AnomalyLabels {
1398 labels: result.labels,
1399 summary: Some(result.summary),
1400 by_type,
1401 })
1402 }
1403
1404 fn validate_journal_entries(
1413 &mut self,
1414 entries: &[JournalEntry],
1415 ) -> SynthResult<BalanceValidationResult> {
1416 let clean_entries: Vec<&JournalEntry> = entries
1418 .iter()
1419 .filter(|e| {
1420 e.header
1421 .header_text
1422 .as_ref()
1423 .map(|t| !t.contains("[HUMAN_ERROR:"))
1424 .unwrap_or(true)
1425 })
1426 .collect();
1427
1428 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
1429
1430 let config = BalanceTrackerConfig {
1432 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
1436 };
1437
1438 let mut tracker = RunningBalanceTracker::new(config);
1439
1440 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
1442 let errors = tracker.apply_entries(&clean_refs);
1443
1444 if let Some(pb) = &pb {
1445 pb.inc(entries.len() as u64);
1446 }
1447
1448 let has_unbalanced = tracker
1451 .get_validation_errors()
1452 .iter()
1453 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
1454
1455 let mut all_errors = errors;
1458 all_errors.extend(tracker.get_validation_errors().iter().cloned());
1459 let company_codes: Vec<String> = self
1460 .config
1461 .companies
1462 .iter()
1463 .map(|c| c.code.clone())
1464 .collect();
1465
1466 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1467 .map(|d| d + chrono::Months::new(self.config.global.period_months))
1468 .unwrap_or_else(|_| chrono::Local::now().date_naive());
1469
1470 for company_code in &company_codes {
1471 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
1472 all_errors.push(e);
1473 }
1474 }
1475
1476 let stats = tracker.get_statistics();
1478
1479 let is_balanced = all_errors.is_empty();
1481
1482 if let Some(pb) = pb {
1483 let msg = if is_balanced {
1484 "Balance validation passed"
1485 } else {
1486 "Balance validation completed with errors"
1487 };
1488 pb.finish_with_message(msg);
1489 }
1490
1491 Ok(BalanceValidationResult {
1492 validated: true,
1493 is_balanced,
1494 entries_processed: stats.entries_processed,
1495 total_debits: stats.total_debits,
1496 total_credits: stats.total_credits,
1497 accounts_tracked: stats.accounts_tracked,
1498 companies_tracked: stats.companies_tracked,
1499 validation_errors: all_errors,
1500 has_unbalanced_entries: has_unbalanced,
1501 })
1502 }
1503
1504 fn inject_data_quality(
1509 &mut self,
1510 entries: &mut [JournalEntry],
1511 ) -> SynthResult<DataQualityStats> {
1512 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
1513
1514 let config = DataQualityConfig::minimal();
1516 let mut injector = DataQualityInjector::new(config);
1517
1518 let context = HashMap::new();
1520
1521 for entry in entries.iter_mut() {
1522 if let Some(text) = &entry.header.header_text {
1524 let processed = injector.process_text_field(
1525 "header_text",
1526 text,
1527 &entry.header.document_id.to_string(),
1528 &context,
1529 );
1530 match processed {
1531 Some(new_text) if new_text != *text => {
1532 entry.header.header_text = Some(new_text);
1533 }
1534 None => {
1535 entry.header.header_text = None; }
1537 _ => {}
1538 }
1539 }
1540
1541 if let Some(ref_text) = &entry.header.reference {
1543 let processed = injector.process_text_field(
1544 "reference",
1545 ref_text,
1546 &entry.header.document_id.to_string(),
1547 &context,
1548 );
1549 match processed {
1550 Some(new_text) if new_text != *ref_text => {
1551 entry.header.reference = Some(new_text);
1552 }
1553 None => {
1554 entry.header.reference = None;
1555 }
1556 _ => {}
1557 }
1558 }
1559
1560 let user_persona = entry.header.user_persona.clone();
1562 if let Some(processed) = injector.process_text_field(
1563 "user_persona",
1564 &user_persona,
1565 &entry.header.document_id.to_string(),
1566 &context,
1567 ) {
1568 if processed != user_persona {
1569 entry.header.user_persona = processed;
1570 }
1571 }
1572
1573 for line in &mut entry.lines {
1575 if let Some(ref text) = line.line_text {
1577 let processed = injector.process_text_field(
1578 "line_text",
1579 text,
1580 &entry.header.document_id.to_string(),
1581 &context,
1582 );
1583 match processed {
1584 Some(new_text) if new_text != *text => {
1585 line.line_text = Some(new_text);
1586 }
1587 None => {
1588 line.line_text = None;
1589 }
1590 _ => {}
1591 }
1592 }
1593
1594 if let Some(cc) = &line.cost_center {
1596 let processed = injector.process_text_field(
1597 "cost_center",
1598 cc,
1599 &entry.header.document_id.to_string(),
1600 &context,
1601 );
1602 match processed {
1603 Some(new_cc) if new_cc != *cc => {
1604 line.cost_center = Some(new_cc);
1605 }
1606 None => {
1607 line.cost_center = None;
1608 }
1609 _ => {}
1610 }
1611 }
1612 }
1613
1614 if let Some(pb) = &pb {
1615 pb.inc(1);
1616 }
1617 }
1618
1619 if let Some(pb) = pb {
1620 pb.finish_with_message("Data quality injection complete");
1621 }
1622
1623 Ok(injector.stats().clone())
1624 }
1625
1626 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
1637 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1638 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1639 let fiscal_year = start_date.year() as u16;
1640 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
1641
1642 let total_revenue: rust_decimal::Decimal = entries
1644 .iter()
1645 .flat_map(|e| e.lines.iter())
1646 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
1647 .map(|l| l.credit_amount)
1648 .sum();
1649
1650 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
1652
1653 let mut snapshot = AuditSnapshot::default();
1654
1655 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
1657 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
1658 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
1659 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
1660 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
1661 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
1662
1663 let accounts: Vec<String> = self
1665 .coa
1666 .as_ref()
1667 .map(|coa| {
1668 coa.get_postable_accounts()
1669 .iter()
1670 .map(|acc| acc.account_code().to_string())
1671 .collect()
1672 })
1673 .unwrap_or_default();
1674
1675 for (i, company) in self.config.companies.iter().enumerate() {
1677 let company_revenue = total_revenue
1679 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
1680
1681 let engagements_for_company =
1683 self.phase_config.audit_engagements / self.config.companies.len().max(1);
1684 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
1685 1
1686 } else {
1687 0
1688 };
1689
1690 for _eng_idx in 0..(engagements_for_company + extra) {
1691 let engagement = engagement_gen.generate_engagement(
1693 &company.code,
1694 &company.name,
1695 fiscal_year,
1696 period_end,
1697 company_revenue,
1698 None, );
1700
1701 if let Some(pb) = &pb {
1702 pb.inc(1);
1703 }
1704
1705 let team_members: Vec<String> = engagement.team_member_ids.clone();
1707
1708 let workpapers =
1710 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
1711
1712 for wp in &workpapers {
1713 if let Some(pb) = &pb {
1714 pb.inc(1);
1715 }
1716
1717 let evidence = evidence_gen.generate_evidence_for_workpaper(
1719 wp,
1720 &team_members,
1721 wp.preparer_date,
1722 );
1723
1724 for _ in &evidence {
1725 if let Some(pb) = &pb {
1726 pb.inc(1);
1727 }
1728 }
1729
1730 snapshot.evidence.extend(evidence);
1731 }
1732
1733 let risks =
1735 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
1736
1737 for _ in &risks {
1738 if let Some(pb) = &pb {
1739 pb.inc(1);
1740 }
1741 }
1742 snapshot.risk_assessments.extend(risks);
1743
1744 let findings = finding_gen.generate_findings_for_engagement(
1746 &engagement,
1747 &workpapers,
1748 &team_members,
1749 );
1750
1751 for _ in &findings {
1752 if let Some(pb) = &pb {
1753 pb.inc(1);
1754 }
1755 }
1756 snapshot.findings.extend(findings);
1757
1758 let judgments =
1760 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
1761
1762 for _ in &judgments {
1763 if let Some(pb) = &pb {
1764 pb.inc(1);
1765 }
1766 }
1767 snapshot.judgments.extend(judgments);
1768
1769 snapshot.workpapers.extend(workpapers);
1771 snapshot.engagements.push(engagement);
1772 }
1773 }
1774
1775 if let Some(pb) = pb {
1776 pb.finish_with_message(format!(
1777 "Audit data: {} engagements, {} workpapers, {} evidence",
1778 snapshot.engagements.len(),
1779 snapshot.workpapers.len(),
1780 snapshot.evidence.len()
1781 ));
1782 }
1783
1784 Ok(snapshot)
1785 }
1786
1787 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
1792 let pb = self.create_progress_bar(100, "Generating Banking Data");
1793
1794 let orchestrator = BankingOrchestratorBuilder::new()
1796 .config(self.config.banking.clone())
1797 .seed(self.seed + 9000)
1798 .build();
1799
1800 if let Some(pb) = &pb {
1801 pb.inc(10);
1802 }
1803
1804 let result = orchestrator.generate();
1806
1807 if let Some(pb) = &pb {
1808 pb.inc(90);
1809 pb.finish_with_message(format!(
1810 "Banking: {} customers, {} transactions",
1811 result.customers.len(),
1812 result.transactions.len()
1813 ));
1814 }
1815
1816 Ok(BankingSnapshot {
1817 customers: result.customers,
1818 accounts: result.accounts,
1819 transactions: result.transactions,
1820 suspicious_count: result.stats.suspicious_count,
1821 scenario_count: result.scenarios.len(),
1822 })
1823 }
1824
1825 fn calculate_total_transactions(&self) -> u64 {
1827 let months = self.config.global.period_months as f64;
1828 self.config
1829 .companies
1830 .iter()
1831 .map(|c| {
1832 let annual = c.annual_transaction_volume.count() as f64;
1833 let weighted = annual * c.volume_weight;
1834 (weighted * months / 12.0) as u64
1835 })
1836 .sum()
1837 }
1838
1839 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
1841 if !self.phase_config.show_progress {
1842 return None;
1843 }
1844
1845 let pb = if let Some(mp) = &self.multi_progress {
1846 mp.add(ProgressBar::new(total))
1847 } else {
1848 ProgressBar::new(total)
1849 };
1850
1851 pb.set_style(
1852 ProgressStyle::default_bar()
1853 .template(&format!(
1854 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
1855 message
1856 ))
1857 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
1858 .progress_chars("#>-"),
1859 );
1860
1861 Some(pb)
1862 }
1863
1864 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
1866 self.coa.clone()
1867 }
1868
1869 pub fn get_master_data(&self) -> &MasterDataSnapshot {
1871 &self.master_data
1872 }
1873}
1874
1875#[cfg(test)]
1876mod tests {
1877 use super::*;
1878 use datasynth_config::schema::*;
1879
1880 fn create_test_config() -> GeneratorConfig {
1881 GeneratorConfig {
1882 global: GlobalConfig {
1883 industry: IndustrySector::Manufacturing,
1884 start_date: "2024-01-01".to_string(),
1885 period_months: 1,
1886 seed: Some(42),
1887 parallel: false,
1888 group_currency: "USD".to_string(),
1889 worker_threads: 0,
1890 memory_limit_mb: 0,
1891 },
1892 companies: vec![CompanyConfig {
1893 code: "1000".to_string(),
1894 name: "Test Company".to_string(),
1895 currency: "USD".to_string(),
1896 country: "US".to_string(),
1897 annual_transaction_volume: TransactionVolume::TenK,
1898 volume_weight: 1.0,
1899 fiscal_year_variant: "K4".to_string(),
1900 }],
1901 chart_of_accounts: ChartOfAccountsConfig {
1902 complexity: CoAComplexity::Small,
1903 industry_specific: true,
1904 custom_accounts: None,
1905 min_hierarchy_depth: 2,
1906 max_hierarchy_depth: 4,
1907 },
1908 transactions: TransactionConfig::default(),
1909 output: OutputConfig::default(),
1910 fraud: FraudConfig::default(),
1911 internal_controls: InternalControlsConfig::default(),
1912 business_processes: BusinessProcessConfig::default(),
1913 user_personas: UserPersonaConfig::default(),
1914 templates: TemplateConfig::default(),
1915 approval: ApprovalConfig::default(),
1916 departments: DepartmentConfig::default(),
1917 master_data: MasterDataConfig::default(),
1918 document_flows: DocumentFlowConfig::default(),
1919 intercompany: IntercompanyConfig::default(),
1920 balance: BalanceConfig::default(),
1921 ocpm: OcpmConfig::default(),
1922 audit: AuditGenerationConfig::default(),
1923 banking: datasynth_banking::BankingConfig::default(),
1924 data_quality: DataQualitySchemaConfig::default(),
1925 }
1926 }
1927
1928 #[test]
1929 fn test_enhanced_orchestrator_creation() {
1930 let config = create_test_config();
1931 let orchestrator = EnhancedOrchestrator::with_defaults(config);
1932 assert!(orchestrator.is_ok());
1933 }
1934
1935 #[test]
1936 fn test_minimal_generation() {
1937 let config = create_test_config();
1938 let phase_config = PhaseConfig {
1939 generate_master_data: false,
1940 generate_document_flows: false,
1941 generate_journal_entries: true,
1942 inject_anomalies: false,
1943 show_progress: false,
1944 ..Default::default()
1945 };
1946
1947 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
1948 let result = orchestrator.generate();
1949
1950 assert!(result.is_ok());
1951 let result = result.unwrap();
1952 assert!(!result.journal_entries.is_empty());
1953 }
1954
1955 #[test]
1956 fn test_master_data_generation() {
1957 let config = create_test_config();
1958 let phase_config = PhaseConfig {
1959 generate_master_data: true,
1960 generate_document_flows: false,
1961 generate_journal_entries: false,
1962 inject_anomalies: false,
1963 show_progress: false,
1964 vendors_per_company: 5,
1965 customers_per_company: 5,
1966 materials_per_company: 10,
1967 assets_per_company: 5,
1968 employees_per_company: 10,
1969 ..Default::default()
1970 };
1971
1972 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
1973 let result = orchestrator.generate().unwrap();
1974
1975 assert!(!result.master_data.vendors.is_empty());
1976 assert!(!result.master_data.customers.is_empty());
1977 assert!(!result.master_data.materials.is_empty());
1978 }
1979
1980 #[test]
1981 fn test_document_flow_generation() {
1982 let config = create_test_config();
1983 let phase_config = PhaseConfig {
1984 generate_master_data: true,
1985 generate_document_flows: true,
1986 generate_journal_entries: false,
1987 inject_anomalies: false,
1988 inject_data_quality: false,
1989 validate_balances: false,
1990 generate_ocpm_events: false,
1991 show_progress: false,
1992 vendors_per_company: 5,
1993 customers_per_company: 5,
1994 materials_per_company: 10,
1995 assets_per_company: 5,
1996 employees_per_company: 10,
1997 p2p_chains: 5,
1998 o2c_chains: 5,
1999 ..Default::default()
2000 };
2001
2002 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2003 let result = orchestrator.generate().unwrap();
2004
2005 assert!(!result.document_flows.p2p_chains.is_empty());
2007 assert!(!result.document_flows.o2c_chains.is_empty());
2008
2009 assert!(!result.document_flows.purchase_orders.is_empty());
2011 assert!(!result.document_flows.sales_orders.is_empty());
2012 }
2013
2014 #[test]
2015 fn test_anomaly_injection() {
2016 let config = create_test_config();
2017 let phase_config = PhaseConfig {
2018 generate_master_data: false,
2019 generate_document_flows: false,
2020 generate_journal_entries: true,
2021 inject_anomalies: true,
2022 show_progress: false,
2023 ..Default::default()
2024 };
2025
2026 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2027 let result = orchestrator.generate().unwrap();
2028
2029 assert!(!result.journal_entries.is_empty());
2031
2032 assert!(result.anomaly_labels.summary.is_some());
2035 }
2036
2037 #[test]
2038 fn test_full_generation_pipeline() {
2039 let config = create_test_config();
2040 let phase_config = PhaseConfig {
2041 generate_master_data: true,
2042 generate_document_flows: true,
2043 generate_journal_entries: true,
2044 inject_anomalies: false,
2045 inject_data_quality: false,
2046 validate_balances: true,
2047 generate_ocpm_events: false,
2048 show_progress: false,
2049 vendors_per_company: 3,
2050 customers_per_company: 3,
2051 materials_per_company: 5,
2052 assets_per_company: 3,
2053 employees_per_company: 5,
2054 p2p_chains: 3,
2055 o2c_chains: 3,
2056 ..Default::default()
2057 };
2058
2059 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2060 let result = orchestrator.generate().unwrap();
2061
2062 assert!(!result.master_data.vendors.is_empty());
2064 assert!(!result.master_data.customers.is_empty());
2065 assert!(!result.document_flows.p2p_chains.is_empty());
2066 assert!(!result.document_flows.o2c_chains.is_empty());
2067 assert!(!result.journal_entries.is_empty());
2068 assert!(result.statistics.accounts_count > 0);
2069
2070 assert!(!result.subledger.ap_invoices.is_empty());
2072 assert!(!result.subledger.ar_invoices.is_empty());
2073
2074 assert!(result.balance_validation.validated);
2076 assert!(result.balance_validation.entries_processed > 0);
2077 }
2078
2079 #[test]
2080 fn test_subledger_linking() {
2081 let config = create_test_config();
2082 let phase_config = PhaseConfig {
2083 generate_master_data: true,
2084 generate_document_flows: true,
2085 generate_journal_entries: false,
2086 inject_anomalies: false,
2087 inject_data_quality: false,
2088 validate_balances: false,
2089 generate_ocpm_events: false,
2090 show_progress: false,
2091 vendors_per_company: 5,
2092 customers_per_company: 5,
2093 materials_per_company: 10,
2094 assets_per_company: 3,
2095 employees_per_company: 5,
2096 p2p_chains: 5,
2097 o2c_chains: 5,
2098 ..Default::default()
2099 };
2100
2101 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2102 let result = orchestrator.generate().unwrap();
2103
2104 assert!(!result.document_flows.vendor_invoices.is_empty());
2106 assert!(!result.document_flows.customer_invoices.is_empty());
2107
2108 assert!(!result.subledger.ap_invoices.is_empty());
2110 assert!(!result.subledger.ar_invoices.is_empty());
2111
2112 assert_eq!(
2114 result.subledger.ap_invoices.len(),
2115 result.document_flows.vendor_invoices.len()
2116 );
2117
2118 assert_eq!(
2120 result.subledger.ar_invoices.len(),
2121 result.document_flows.customer_invoices.len()
2122 );
2123
2124 assert_eq!(
2126 result.statistics.ap_invoice_count,
2127 result.subledger.ap_invoices.len()
2128 );
2129 assert_eq!(
2130 result.statistics.ar_invoice_count,
2131 result.subledger.ar_invoices.len()
2132 );
2133 }
2134
2135 #[test]
2136 fn test_balance_validation() {
2137 let config = create_test_config();
2138 let phase_config = PhaseConfig {
2139 generate_master_data: false,
2140 generate_document_flows: false,
2141 generate_journal_entries: true,
2142 inject_anomalies: false,
2143 validate_balances: true,
2144 show_progress: false,
2145 ..Default::default()
2146 };
2147
2148 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2149 let result = orchestrator.generate().unwrap();
2150
2151 assert!(result.balance_validation.validated);
2153 assert!(result.balance_validation.entries_processed > 0);
2154
2155 assert!(!result.balance_validation.has_unbalanced_entries);
2157
2158 assert_eq!(
2160 result.balance_validation.total_debits,
2161 result.balance_validation.total_credits
2162 );
2163 }
2164
2165 #[test]
2166 fn test_statistics_accuracy() {
2167 let config = create_test_config();
2168 let phase_config = PhaseConfig {
2169 generate_master_data: true,
2170 generate_document_flows: false,
2171 generate_journal_entries: true,
2172 inject_anomalies: false,
2173 show_progress: false,
2174 vendors_per_company: 10,
2175 customers_per_company: 20,
2176 materials_per_company: 15,
2177 assets_per_company: 5,
2178 employees_per_company: 8,
2179 ..Default::default()
2180 };
2181
2182 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2183 let result = orchestrator.generate().unwrap();
2184
2185 assert_eq!(
2187 result.statistics.vendor_count,
2188 result.master_data.vendors.len()
2189 );
2190 assert_eq!(
2191 result.statistics.customer_count,
2192 result.master_data.customers.len()
2193 );
2194 assert_eq!(
2195 result.statistics.material_count,
2196 result.master_data.materials.len()
2197 );
2198 assert_eq!(
2199 result.statistics.total_entries as usize,
2200 result.journal_entries.len()
2201 );
2202 }
2203
2204 #[test]
2205 fn test_phase_config_defaults() {
2206 let config = PhaseConfig::default();
2207 assert!(config.generate_master_data);
2208 assert!(config.generate_document_flows);
2209 assert!(config.generate_journal_entries);
2210 assert!(!config.inject_anomalies);
2211 assert!(config.validate_balances);
2212 assert!(config.show_progress);
2213 assert!(config.vendors_per_company > 0);
2214 assert!(config.customers_per_company > 0);
2215 }
2216
2217 #[test]
2218 fn test_get_coa_before_generation() {
2219 let config = create_test_config();
2220 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
2221
2222 assert!(orchestrator.get_coa().is_none());
2224 }
2225
2226 #[test]
2227 fn test_get_coa_after_generation() {
2228 let config = create_test_config();
2229 let phase_config = PhaseConfig {
2230 generate_master_data: false,
2231 generate_document_flows: false,
2232 generate_journal_entries: true,
2233 inject_anomalies: false,
2234 show_progress: false,
2235 ..Default::default()
2236 };
2237
2238 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2239 let _ = orchestrator.generate().unwrap();
2240
2241 assert!(orchestrator.get_coa().is_some());
2243 }
2244
2245 #[test]
2246 fn test_get_master_data() {
2247 let config = create_test_config();
2248 let phase_config = PhaseConfig {
2249 generate_master_data: true,
2250 generate_document_flows: false,
2251 generate_journal_entries: false,
2252 inject_anomalies: false,
2253 show_progress: false,
2254 vendors_per_company: 5,
2255 customers_per_company: 5,
2256 materials_per_company: 5,
2257 assets_per_company: 5,
2258 employees_per_company: 5,
2259 ..Default::default()
2260 };
2261
2262 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2263 let _ = orchestrator.generate().unwrap();
2264
2265 let master_data = orchestrator.get_master_data();
2266 assert!(!master_data.vendors.is_empty());
2267 }
2268
2269 #[test]
2270 fn test_with_progress_builder() {
2271 let config = create_test_config();
2272 let orchestrator = EnhancedOrchestrator::with_defaults(config)
2273 .unwrap()
2274 .with_progress(false);
2275
2276 assert!(!orchestrator.phase_config.show_progress);
2278 }
2279
2280 #[test]
2281 fn test_multi_company_generation() {
2282 let mut config = create_test_config();
2283 config.companies.push(CompanyConfig {
2284 code: "2000".to_string(),
2285 name: "Subsidiary".to_string(),
2286 currency: "EUR".to_string(),
2287 country: "DE".to_string(),
2288 annual_transaction_volume: TransactionVolume::TenK,
2289 volume_weight: 0.5,
2290 fiscal_year_variant: "K4".to_string(),
2291 });
2292
2293 let phase_config = PhaseConfig {
2294 generate_master_data: true,
2295 generate_document_flows: false,
2296 generate_journal_entries: true,
2297 inject_anomalies: false,
2298 show_progress: false,
2299 vendors_per_company: 5,
2300 customers_per_company: 5,
2301 materials_per_company: 5,
2302 assets_per_company: 5,
2303 employees_per_company: 5,
2304 ..Default::default()
2305 };
2306
2307 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2308 let result = orchestrator.generate().unwrap();
2309
2310 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
2313 assert!(result.statistics.companies_count == 2);
2314 }
2315
2316 #[test]
2317 fn test_empty_master_data_skips_document_flows() {
2318 let config = create_test_config();
2319 let phase_config = PhaseConfig {
2320 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
2323 inject_anomalies: false,
2324 show_progress: false,
2325 ..Default::default()
2326 };
2327
2328 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2329 let result = orchestrator.generate().unwrap();
2330
2331 assert!(result.document_flows.p2p_chains.is_empty());
2333 assert!(result.document_flows.o2c_chains.is_empty());
2334 }
2335
2336 #[test]
2337 fn test_journal_entry_line_item_count() {
2338 let config = create_test_config();
2339 let phase_config = PhaseConfig {
2340 generate_master_data: false,
2341 generate_document_flows: false,
2342 generate_journal_entries: true,
2343 inject_anomalies: false,
2344 show_progress: false,
2345 ..Default::default()
2346 };
2347
2348 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2349 let result = orchestrator.generate().unwrap();
2350
2351 let calculated_line_items: u64 = result
2353 .journal_entries
2354 .iter()
2355 .map(|e| e.line_count() as u64)
2356 .sum();
2357 assert_eq!(result.statistics.total_line_items, calculated_line_items);
2358 }
2359
2360 #[test]
2361 fn test_audit_generation() {
2362 let config = create_test_config();
2363 let phase_config = PhaseConfig {
2364 generate_master_data: false,
2365 generate_document_flows: false,
2366 generate_journal_entries: true,
2367 inject_anomalies: false,
2368 show_progress: false,
2369 generate_audit: true,
2370 audit_engagements: 2,
2371 workpapers_per_engagement: 5,
2372 evidence_per_workpaper: 2,
2373 risks_per_engagement: 3,
2374 findings_per_engagement: 2,
2375 judgments_per_engagement: 2,
2376 ..Default::default()
2377 };
2378
2379 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2380 let result = orchestrator.generate().unwrap();
2381
2382 assert_eq!(result.audit.engagements.len(), 2);
2384 assert!(!result.audit.workpapers.is_empty());
2385 assert!(!result.audit.evidence.is_empty());
2386 assert!(!result.audit.risk_assessments.is_empty());
2387 assert!(!result.audit.findings.is_empty());
2388 assert!(!result.audit.judgments.is_empty());
2389
2390 assert_eq!(
2392 result.statistics.audit_engagement_count,
2393 result.audit.engagements.len()
2394 );
2395 assert_eq!(
2396 result.statistics.audit_workpaper_count,
2397 result.audit.workpapers.len()
2398 );
2399 assert_eq!(
2400 result.statistics.audit_evidence_count,
2401 result.audit.evidence.len()
2402 );
2403 assert_eq!(
2404 result.statistics.audit_risk_count,
2405 result.audit.risk_assessments.len()
2406 );
2407 assert_eq!(
2408 result.statistics.audit_finding_count,
2409 result.audit.findings.len()
2410 );
2411 assert_eq!(
2412 result.statistics.audit_judgment_count,
2413 result.audit.judgments.len()
2414 );
2415 }
2416}