1use std::collections::HashMap;
16use std::path::PathBuf;
17use std::sync::Arc;
18
19use chrono::{Datelike, NaiveDate};
20use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
21use serde::{Deserialize, Serialize};
22use tracing::{debug, info, warn};
23
24use datasynth_banking::{
25 models::{BankAccount, BankTransaction, BankingCustomer},
26 BankingOrchestratorBuilder,
27};
28use datasynth_config::schema::GeneratorConfig;
29use datasynth_core::error::{SynthError, SynthResult};
30use datasynth_core::models::audit::{
31 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
32};
33use datasynth_core::models::subledger::ap::APInvoice;
34use datasynth_core::models::subledger::ar::ARInvoice;
35use datasynth_core::models::*;
36use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
37use datasynth_fingerprint::{
38 io::FingerprintReader,
39 models::Fingerprint,
40 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
41};
42use datasynth_generators::{
43 AnomalyInjector,
45 AnomalyInjectorConfig,
46 AssetGenerator,
47 AuditEngagementGenerator,
49 BalanceTrackerConfig,
50 ChartOfAccountsGenerator,
52 CustomerGenerator,
53 DataQualityConfig,
54 DataQualityInjector,
56 DataQualityStats,
57 DocumentFlowJeConfig,
59 DocumentFlowJeGenerator,
60 DocumentFlowLinker,
62 EmployeeGenerator,
63 EvidenceGenerator,
64 FindingGenerator,
65 JournalEntryGenerator,
66 JudgmentGenerator,
67 LatePaymentDistribution,
68 MaterialGenerator,
69 O2CDocumentChain,
70 O2CGenerator,
71 O2CGeneratorConfig,
72 O2CPaymentBehavior,
73 P2PDocumentChain,
74 P2PGenerator,
76 P2PGeneratorConfig,
77 P2PPaymentBehavior,
78 RiskAssessmentGenerator,
79 RunningBalanceTracker,
81 ValidationError,
82 VendorGenerator,
84 WorkpaperGenerator,
85};
86use datasynth_graph::{
87 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
88};
89use datasynth_ocpm::{
90 EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
91 P2pDocuments,
92};
93
94use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
95use datasynth_core::models::documents::PaymentMethod;
96
97fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
103 let payment_behavior = &schema_config.payment_behavior;
104 let late_dist = &payment_behavior.late_payment_days_distribution;
105
106 P2PGeneratorConfig {
107 three_way_match_rate: schema_config.three_way_match_rate,
108 partial_delivery_rate: schema_config.partial_delivery_rate,
109 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
111 max_price_variance_percent: schema_config.max_price_variance_percent,
112 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
113 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
114 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
115 payment_method_distribution: vec![
116 (PaymentMethod::BankTransfer, 0.60),
117 (PaymentMethod::Check, 0.25),
118 (PaymentMethod::Wire, 0.10),
119 (PaymentMethod::CreditCard, 0.05),
120 ],
121 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
123 late_payment_rate: payment_behavior.late_payment_rate,
124 late_payment_distribution: LatePaymentDistribution {
125 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
126 late_8_to_14: late_dist.late_8_to_14,
127 very_late_15_to_30: late_dist.very_late_15_to_30,
128 severely_late_31_to_60: late_dist.severely_late_31_to_60,
129 extremely_late_over_60: late_dist.extremely_late_over_60,
130 },
131 partial_payment_rate: payment_behavior.partial_payment_rate,
132 payment_correction_rate: payment_behavior.payment_correction_rate,
133 },
134 }
135}
136
137fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
139 let payment_behavior = &schema_config.payment_behavior;
140
141 O2CGeneratorConfig {
142 credit_check_failure_rate: schema_config.credit_check_failure_rate,
143 partial_shipment_rate: schema_config.partial_shipment_rate,
144 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
145 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
146 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
147 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
149 returns_rate: schema_config.return_rate,
150 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
151 payment_method_distribution: vec![
152 (PaymentMethod::BankTransfer, 0.50),
153 (PaymentMethod::Check, 0.30),
154 (PaymentMethod::Wire, 0.15),
155 (PaymentMethod::CreditCard, 0.05),
156 ],
157 payment_behavior: O2CPaymentBehavior {
158 partial_payment_rate: payment_behavior.partial_payments.rate,
159 short_payment_rate: payment_behavior.short_payments.rate,
160 max_short_percent: payment_behavior.short_payments.max_short_percent,
161 on_account_rate: payment_behavior.on_account_payments.rate,
162 payment_correction_rate: payment_behavior.payment_corrections.rate,
163 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
164 },
165 }
166}
167
168#[derive(Debug, Clone)]
170pub struct PhaseConfig {
171 pub generate_master_data: bool,
173 pub generate_document_flows: bool,
175 pub generate_ocpm_events: bool,
177 pub generate_journal_entries: bool,
179 pub inject_anomalies: bool,
181 pub inject_data_quality: bool,
183 pub validate_balances: bool,
185 pub show_progress: bool,
187 pub vendors_per_company: usize,
189 pub customers_per_company: usize,
191 pub materials_per_company: usize,
193 pub assets_per_company: usize,
195 pub employees_per_company: usize,
197 pub p2p_chains: usize,
199 pub o2c_chains: usize,
201 pub generate_audit: bool,
203 pub audit_engagements: usize,
205 pub workpapers_per_engagement: usize,
207 pub evidence_per_workpaper: usize,
209 pub risks_per_engagement: usize,
211 pub findings_per_engagement: usize,
213 pub judgments_per_engagement: usize,
215 pub generate_banking: bool,
217 pub generate_graph_export: bool,
219}
220
221impl Default for PhaseConfig {
222 fn default() -> Self {
223 Self {
224 generate_master_data: true,
225 generate_document_flows: true,
226 generate_ocpm_events: false, generate_journal_entries: true,
228 inject_anomalies: false,
229 inject_data_quality: false, validate_balances: true,
231 show_progress: true,
232 vendors_per_company: 50,
233 customers_per_company: 100,
234 materials_per_company: 200,
235 assets_per_company: 50,
236 employees_per_company: 100,
237 p2p_chains: 100,
238 o2c_chains: 100,
239 generate_audit: false, audit_engagements: 5,
241 workpapers_per_engagement: 20,
242 evidence_per_workpaper: 5,
243 risks_per_engagement: 15,
244 findings_per_engagement: 8,
245 judgments_per_engagement: 10,
246 generate_banking: false, generate_graph_export: false, }
249 }
250}
251
252#[derive(Debug, Clone, Default)]
254pub struct MasterDataSnapshot {
255 pub vendors: Vec<Vendor>,
257 pub customers: Vec<Customer>,
259 pub materials: Vec<Material>,
261 pub assets: Vec<FixedAsset>,
263 pub employees: Vec<Employee>,
265}
266
267#[derive(Debug, Clone, Default)]
269pub struct DocumentFlowSnapshot {
270 pub p2p_chains: Vec<P2PDocumentChain>,
272 pub o2c_chains: Vec<O2CDocumentChain>,
274 pub purchase_orders: Vec<documents::PurchaseOrder>,
276 pub goods_receipts: Vec<documents::GoodsReceipt>,
278 pub vendor_invoices: Vec<documents::VendorInvoice>,
280 pub sales_orders: Vec<documents::SalesOrder>,
282 pub deliveries: Vec<documents::Delivery>,
284 pub customer_invoices: Vec<documents::CustomerInvoice>,
286 pub payments: Vec<documents::Payment>,
288}
289
290#[derive(Debug, Clone, Default)]
292pub struct SubledgerSnapshot {
293 pub ap_invoices: Vec<APInvoice>,
295 pub ar_invoices: Vec<ARInvoice>,
297}
298
299#[derive(Debug, Clone, Default)]
301pub struct OcpmSnapshot {
302 pub event_log: Option<OcpmEventLog>,
304 pub event_count: usize,
306 pub object_count: usize,
308 pub case_count: usize,
310}
311
312#[derive(Debug, Clone, Default)]
314pub struct AuditSnapshot {
315 pub engagements: Vec<AuditEngagement>,
317 pub workpapers: Vec<Workpaper>,
319 pub evidence: Vec<AuditEvidence>,
321 pub risk_assessments: Vec<RiskAssessment>,
323 pub findings: Vec<AuditFinding>,
325 pub judgments: Vec<ProfessionalJudgment>,
327}
328
329#[derive(Debug, Clone, Default)]
331pub struct BankingSnapshot {
332 pub customers: Vec<BankingCustomer>,
334 pub accounts: Vec<BankAccount>,
336 pub transactions: Vec<BankTransaction>,
338 pub suspicious_count: usize,
340 pub scenario_count: usize,
342}
343
344#[derive(Debug, Clone, Default)]
346pub struct GraphExportSnapshot {
347 pub exported: bool,
349 pub graph_count: usize,
351 pub exports: HashMap<String, GraphExportInfo>,
353}
354
355#[derive(Debug, Clone)]
357pub struct GraphExportInfo {
358 pub name: String,
360 pub format: String,
362 pub output_path: PathBuf,
364 pub node_count: usize,
366 pub edge_count: usize,
368}
369
370#[derive(Debug, Clone, Default)]
372pub struct AnomalyLabels {
373 pub labels: Vec<LabeledAnomaly>,
375 pub summary: Option<AnomalySummary>,
377 pub by_type: HashMap<String, usize>,
379}
380
381#[derive(Debug, Clone, Default)]
383pub struct BalanceValidationResult {
384 pub validated: bool,
386 pub is_balanced: bool,
388 pub entries_processed: u64,
390 pub total_debits: rust_decimal::Decimal,
392 pub total_credits: rust_decimal::Decimal,
394 pub accounts_tracked: usize,
396 pub companies_tracked: usize,
398 pub validation_errors: Vec<ValidationError>,
400 pub has_unbalanced_entries: bool,
402}
403
404#[derive(Debug)]
406pub struct EnhancedGenerationResult {
407 pub chart_of_accounts: ChartOfAccounts,
409 pub master_data: MasterDataSnapshot,
411 pub document_flows: DocumentFlowSnapshot,
413 pub subledger: SubledgerSnapshot,
415 pub ocpm: OcpmSnapshot,
417 pub audit: AuditSnapshot,
419 pub banking: BankingSnapshot,
421 pub graph_export: GraphExportSnapshot,
423 pub journal_entries: Vec<JournalEntry>,
425 pub anomaly_labels: AnomalyLabels,
427 pub balance_validation: BalanceValidationResult,
429 pub data_quality_stats: DataQualityStats,
431 pub statistics: EnhancedGenerationStatistics,
433}
434
435#[derive(Debug, Clone, Default, Serialize, Deserialize)]
437pub struct EnhancedGenerationStatistics {
438 pub total_entries: u64,
440 pub total_line_items: u64,
442 pub accounts_count: usize,
444 pub companies_count: usize,
446 pub period_months: u32,
448 pub vendor_count: usize,
450 pub customer_count: usize,
451 pub material_count: usize,
452 pub asset_count: usize,
453 pub employee_count: usize,
454 pub p2p_chain_count: usize,
456 pub o2c_chain_count: usize,
457 pub ap_invoice_count: usize,
459 pub ar_invoice_count: usize,
460 pub ocpm_event_count: usize,
462 pub ocpm_object_count: usize,
463 pub ocpm_case_count: usize,
464 pub audit_engagement_count: usize,
466 pub audit_workpaper_count: usize,
467 pub audit_evidence_count: usize,
468 pub audit_risk_count: usize,
469 pub audit_finding_count: usize,
470 pub audit_judgment_count: usize,
471 pub anomalies_injected: usize,
473 pub data_quality_issues: usize,
475 pub banking_customer_count: usize,
477 pub banking_account_count: usize,
478 pub banking_transaction_count: usize,
479 pub banking_suspicious_count: usize,
480 pub graph_export_count: usize,
482 pub graph_node_count: usize,
483 pub graph_edge_count: usize,
484}
485
486pub struct EnhancedOrchestrator {
488 config: GeneratorConfig,
489 phase_config: PhaseConfig,
490 coa: Option<Arc<ChartOfAccounts>>,
491 master_data: MasterDataSnapshot,
492 seed: u64,
493 multi_progress: Option<MultiProgress>,
494 resource_guard: ResourceGuard,
496 output_path: Option<PathBuf>,
498 copula_generators: Vec<CopulaGeneratorSpec>,
500}
501
502impl EnhancedOrchestrator {
503 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
505 datasynth_config::validate_config(&config)?;
506
507 let seed = config.global.seed.unwrap_or_else(rand::random);
508
509 let resource_guard = Self::build_resource_guard(&config, None);
511
512 Ok(Self {
513 config,
514 phase_config,
515 coa: None,
516 master_data: MasterDataSnapshot::default(),
517 seed,
518 multi_progress: None,
519 resource_guard,
520 output_path: None,
521 copula_generators: Vec::new(),
522 })
523 }
524
525 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
527 Self::new(config, PhaseConfig::default())
528 }
529
530 pub fn with_progress(mut self, show: bool) -> Self {
532 self.phase_config.show_progress = show;
533 if show {
534 self.multi_progress = Some(MultiProgress::new());
535 }
536 self
537 }
538
539 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
541 let path = path.into();
542 self.output_path = Some(path.clone());
543 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
545 self
546 }
547
548 pub fn has_copulas(&self) -> bool {
553 !self.copula_generators.is_empty()
554 }
555
556 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
562 &self.copula_generators
563 }
564
565 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
569 &mut self.copula_generators
570 }
571
572 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
576 self.copula_generators
577 .iter_mut()
578 .find(|c| c.name == copula_name)
579 .map(|c| c.generator.sample())
580 }
581
582 pub fn from_fingerprint(
605 fingerprint_path: &std::path::Path,
606 phase_config: PhaseConfig,
607 scale: f64,
608 ) -> SynthResult<Self> {
609 info!("Loading fingerprint from: {}", fingerprint_path.display());
610
611 let reader = FingerprintReader::new();
613 let fingerprint = reader
614 .read_from_file(fingerprint_path)
615 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
616
617 Self::from_fingerprint_data(fingerprint, phase_config, scale)
618 }
619
620 pub fn from_fingerprint_data(
627 fingerprint: Fingerprint,
628 phase_config: PhaseConfig,
629 scale: f64,
630 ) -> SynthResult<Self> {
631 info!(
632 "Synthesizing config from fingerprint (version: {}, tables: {})",
633 fingerprint.manifest.version,
634 fingerprint.schema.tables.len()
635 );
636
637 let seed: u64 = rand::random();
639
640 let options = SynthesisOptions {
642 scale,
643 seed: Some(seed),
644 preserve_correlations: true,
645 inject_anomalies: true,
646 };
647 let synthesizer = ConfigSynthesizer::with_options(options);
648
649 let synthesis_result = synthesizer
651 .synthesize_full(&fingerprint, seed)
652 .map_err(|e| {
653 SynthError::config(format!(
654 "Failed to synthesize config from fingerprint: {}",
655 e
656 ))
657 })?;
658
659 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
661 Self::base_config_for_industry(industry)
662 } else {
663 Self::base_config_for_industry("manufacturing")
664 };
665
666 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
668
669 info!(
671 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
672 fingerprint.schema.tables.len(),
673 scale,
674 synthesis_result.copula_generators.len()
675 );
676
677 if !synthesis_result.copula_generators.is_empty() {
678 for spec in &synthesis_result.copula_generators {
679 info!(
680 " Copula '{}' for table '{}': {} columns",
681 spec.name,
682 spec.table,
683 spec.columns.len()
684 );
685 }
686 }
687
688 let mut orchestrator = Self::new(config, phase_config)?;
690
691 orchestrator.copula_generators = synthesis_result.copula_generators;
693
694 Ok(orchestrator)
695 }
696
697 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
699 use datasynth_config::presets::create_preset;
700 use datasynth_config::TransactionVolume;
701 use datasynth_core::models::{CoAComplexity, IndustrySector};
702
703 let sector = match industry.to_lowercase().as_str() {
704 "manufacturing" => IndustrySector::Manufacturing,
705 "retail" => IndustrySector::Retail,
706 "financial" | "financial_services" => IndustrySector::FinancialServices,
707 "healthcare" => IndustrySector::Healthcare,
708 "technology" | "tech" => IndustrySector::Technology,
709 _ => IndustrySector::Manufacturing,
710 };
711
712 create_preset(
714 sector,
715 1, 12, CoAComplexity::Medium,
718 TransactionVolume::TenK,
719 )
720 }
721
722 fn apply_config_patch(
724 mut config: GeneratorConfig,
725 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
726 ) -> GeneratorConfig {
727 use datasynth_fingerprint::synthesis::ConfigValue;
728
729 for (key, value) in patch.values() {
730 match (key.as_str(), value) {
731 ("transactions.count", ConfigValue::Integer(n)) => {
734 info!(
735 "Fingerprint suggests {} transactions (apply via company volumes)",
736 n
737 );
738 }
739 ("global.period_months", ConfigValue::Integer(n)) => {
740 config.global.period_months = *n as u32;
741 }
742 ("global.start_date", ConfigValue::String(s)) => {
743 config.global.start_date = s.clone();
744 }
745 ("global.seed", ConfigValue::Integer(n)) => {
746 config.global.seed = Some(*n as u64);
747 }
748 ("fraud.enabled", ConfigValue::Bool(b)) => {
749 config.fraud.enabled = *b;
750 }
751 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
752 config.fraud.fraud_rate = *f;
753 }
754 ("data_quality.enabled", ConfigValue::Bool(b)) => {
755 config.data_quality.enabled = *b;
756 }
757 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
759 config.fraud.enabled = *b;
760 }
761 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
762 config.fraud.fraud_rate = *f;
763 }
764 _ => {
765 debug!("Ignoring unknown config patch key: {}", key);
766 }
767 }
768 }
769
770 config
771 }
772
773 fn build_resource_guard(
775 config: &GeneratorConfig,
776 output_path: Option<PathBuf>,
777 ) -> ResourceGuard {
778 let mut builder = ResourceGuardBuilder::new();
779
780 if config.global.memory_limit_mb > 0 {
782 builder = builder.memory_limit(config.global.memory_limit_mb);
783 }
784
785 if let Some(path) = output_path {
787 builder = builder.output_path(path).min_free_disk(100); }
789
790 builder = builder.conservative();
792
793 builder.build()
794 }
795
796 fn check_resources(&self) -> SynthResult<DegradationLevel> {
801 self.resource_guard.check()
802 }
803
804 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
806 let level = self.resource_guard.check()?;
807
808 if level != DegradationLevel::Normal {
809 warn!(
810 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
811 phase,
812 level,
813 self.resource_guard.current_memory_mb(),
814 self.resource_guard.available_disk_mb()
815 );
816 }
817
818 Ok(level)
819 }
820
821 fn get_degradation_actions(&self) -> DegradationActions {
823 self.resource_guard.get_actions()
824 }
825
826 fn check_memory_limit(&self) -> SynthResult<()> {
828 self.check_resources()?;
829 Ok(())
830 }
831
832 #[allow(clippy::field_reassign_with_default)]
834 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
835 info!("Starting enhanced generation workflow");
836 info!(
837 "Config: industry={:?}, period_months={}, companies={}",
838 self.config.global.industry,
839 self.config.global.period_months,
840 self.config.companies.len()
841 );
842
843 let initial_level = self.check_resources_with_log("initial")?;
845 if initial_level == DegradationLevel::Emergency {
846 return Err(SynthError::resource(
847 "Insufficient resources to start generation",
848 ));
849 }
850
851 let mut stats = EnhancedGenerationStatistics::default();
852 stats.companies_count = self.config.companies.len();
853 stats.period_months = self.config.global.period_months;
854
855 info!("Phase 1: Generating Chart of Accounts");
857 let coa = self.generate_coa()?;
858 stats.accounts_count = coa.account_count();
859 info!(
860 "Chart of Accounts generated: {} accounts",
861 stats.accounts_count
862 );
863
864 self.check_resources_with_log("post-coa")?;
866
867 if self.phase_config.generate_master_data {
869 info!("Phase 2: Generating Master Data");
870 self.generate_master_data()?;
871 stats.vendor_count = self.master_data.vendors.len();
872 stats.customer_count = self.master_data.customers.len();
873 stats.material_count = self.master_data.materials.len();
874 stats.asset_count = self.master_data.assets.len();
875 stats.employee_count = self.master_data.employees.len();
876 info!(
877 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
878 stats.vendor_count, stats.customer_count, stats.material_count,
879 stats.asset_count, stats.employee_count
880 );
881
882 self.check_resources_with_log("post-master-data")?;
884 } else {
885 debug!("Phase 2: Skipped (master data generation disabled)");
886 }
887
888 let mut document_flows = DocumentFlowSnapshot::default();
890 let mut subledger = SubledgerSnapshot::default();
891 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
892 info!("Phase 3: Generating Document Flows");
893 self.generate_document_flows(&mut document_flows)?;
894 stats.p2p_chain_count = document_flows.p2p_chains.len();
895 stats.o2c_chain_count = document_flows.o2c_chains.len();
896 info!(
897 "Document flows generated: {} P2P chains, {} O2C chains",
898 stats.p2p_chain_count, stats.o2c_chain_count
899 );
900
901 debug!("Phase 3b: Linking document flows to subledgers");
903 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
904 stats.ap_invoice_count = subledger.ap_invoices.len();
905 stats.ar_invoice_count = subledger.ar_invoices.len();
906 debug!(
907 "Subledgers linked: {} AP invoices, {} AR invoices",
908 stats.ap_invoice_count, stats.ar_invoice_count
909 );
910
911 self.check_resources_with_log("post-document-flows")?;
913 } else {
914 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
915 }
916
917 let mut ocpm_snapshot = OcpmSnapshot::default();
919 if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
920 info!("Phase 3c: Generating OCPM Events");
921 ocpm_snapshot = self.generate_ocpm_events(&document_flows)?;
922 stats.ocpm_event_count = ocpm_snapshot.event_count;
923 stats.ocpm_object_count = ocpm_snapshot.object_count;
924 stats.ocpm_case_count = ocpm_snapshot.case_count;
925 info!(
926 "OCPM events generated: {} events, {} objects, {} cases",
927 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
928 );
929
930 self.check_resources_with_log("post-ocpm")?;
932 } else {
933 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
934 }
935
936 let mut entries = Vec::new();
938
939 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
941 debug!("Phase 4a: Generating JEs from document flows");
942 let flow_entries = self.generate_jes_from_document_flows(&document_flows)?;
943 debug!("Generated {} JEs from document flows", flow_entries.len());
944 entries.extend(flow_entries);
945 }
946
947 if self.phase_config.generate_journal_entries {
949 info!("Phase 4: Generating Journal Entries");
950 let je_entries = self.generate_journal_entries(&coa)?;
951 info!("Generated {} standalone journal entries", je_entries.len());
952 entries.extend(je_entries);
953 } else {
954 debug!("Phase 4: Skipped (journal entry generation disabled)");
955 }
956
957 if !entries.is_empty() {
958 stats.total_entries = entries.len() as u64;
959 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
960 info!(
961 "Total entries: {}, total line items: {}",
962 stats.total_entries, stats.total_line_items
963 );
964
965 self.check_resources_with_log("post-journal-entries")?;
967 }
968
969 let actions = self.get_degradation_actions();
971
972 let mut anomaly_labels = AnomalyLabels::default();
974 if self.phase_config.inject_anomalies
975 && !entries.is_empty()
976 && !actions.skip_anomaly_injection
977 {
978 info!("Phase 5: Injecting Anomalies");
979 let result = self.inject_anomalies(&mut entries)?;
980 stats.anomalies_injected = result.labels.len();
981 anomaly_labels = result;
982 info!("Injected {} anomalies", stats.anomalies_injected);
983
984 self.check_resources_with_log("post-anomaly-injection")?;
986 } else if actions.skip_anomaly_injection {
987 warn!("Phase 5: Skipped due to resource degradation");
988 } else {
989 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
990 }
991
992 let mut balance_validation = BalanceValidationResult::default();
994 if self.phase_config.validate_balances && !entries.is_empty() {
995 debug!("Phase 6: Validating Balances");
996 balance_validation = self.validate_journal_entries(&entries)?;
997 if balance_validation.is_balanced {
998 debug!("Balance validation passed");
999 } else {
1000 warn!(
1001 "Balance validation found {} errors",
1002 balance_validation.validation_errors.len()
1003 );
1004 }
1005 }
1006
1007 let mut data_quality_stats = DataQualityStats::default();
1009 if self.phase_config.inject_data_quality
1010 && !entries.is_empty()
1011 && !actions.skip_data_quality
1012 {
1013 info!("Phase 7: Injecting Data Quality Variations");
1014 data_quality_stats = self.inject_data_quality(&mut entries)?;
1015 stats.data_quality_issues = data_quality_stats.records_with_issues;
1016 info!("Injected {} data quality issues", stats.data_quality_issues);
1017
1018 self.check_resources_with_log("post-data-quality")?;
1020 } else if actions.skip_data_quality {
1021 warn!("Phase 7: Skipped due to resource degradation");
1022 } else {
1023 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1024 }
1025
1026 let mut audit_snapshot = AuditSnapshot::default();
1028 if self.phase_config.generate_audit {
1029 info!("Phase 8: Generating Audit Data");
1030 audit_snapshot = self.generate_audit_data(&entries)?;
1031 stats.audit_engagement_count = audit_snapshot.engagements.len();
1032 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1033 stats.audit_evidence_count = audit_snapshot.evidence.len();
1034 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1035 stats.audit_finding_count = audit_snapshot.findings.len();
1036 stats.audit_judgment_count = audit_snapshot.judgments.len();
1037 info!(
1038 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1039 stats.audit_engagement_count, stats.audit_workpaper_count,
1040 stats.audit_evidence_count, stats.audit_risk_count,
1041 stats.audit_finding_count, stats.audit_judgment_count
1042 );
1043
1044 self.check_resources_with_log("post-audit")?;
1046 } else {
1047 debug!("Phase 8: Skipped (audit generation disabled)");
1048 }
1049
1050 let mut banking_snapshot = BankingSnapshot::default();
1052 if self.phase_config.generate_banking && self.config.banking.enabled {
1053 info!("Phase 9: Generating Banking KYC/AML Data");
1054 banking_snapshot = self.generate_banking_data()?;
1055 stats.banking_customer_count = banking_snapshot.customers.len();
1056 stats.banking_account_count = banking_snapshot.accounts.len();
1057 stats.banking_transaction_count = banking_snapshot.transactions.len();
1058 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1059 info!(
1060 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1061 stats.banking_customer_count, stats.banking_account_count,
1062 stats.banking_transaction_count, stats.banking_suspicious_count
1063 );
1064
1065 self.check_resources_with_log("post-banking")?;
1067 } else {
1068 debug!("Phase 9: Skipped (banking generation disabled)");
1069 }
1070
1071 let graph_export_snapshot = if (self.phase_config.generate_graph_export
1073 || self.config.graph_export.enabled)
1074 && !entries.is_empty()
1075 {
1076 info!("Phase 10: Exporting Accounting Network Graphs");
1077 match self.export_graphs(&entries, &coa, &mut stats) {
1078 Ok(snapshot) => {
1079 info!(
1080 "Graph export complete: {} graphs ({} nodes, {} edges)",
1081 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1082 );
1083 snapshot
1084 }
1085 Err(e) => {
1086 warn!("Phase 10: Graph export failed: {}", e);
1087 GraphExportSnapshot::default()
1088 }
1089 }
1090 } else {
1091 debug!("Phase 10: Skipped (graph export disabled or no entries)");
1092 GraphExportSnapshot::default()
1093 };
1094
1095 let resource_stats = self.resource_guard.stats();
1097 info!(
1098 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1099 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1100 resource_stats.disk.estimated_bytes_written,
1101 resource_stats.degradation_level
1102 );
1103
1104 Ok(EnhancedGenerationResult {
1105 chart_of_accounts: (*coa).clone(),
1106 master_data: self.master_data.clone(),
1107 document_flows,
1108 subledger,
1109 ocpm: ocpm_snapshot,
1110 audit: audit_snapshot,
1111 banking: banking_snapshot,
1112 graph_export: graph_export_snapshot,
1113 journal_entries: entries,
1114 anomaly_labels,
1115 balance_validation,
1116 data_quality_stats,
1117 statistics: stats,
1118 })
1119 }
1120
1121 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
1123 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
1124
1125 let mut gen = ChartOfAccountsGenerator::new(
1126 self.config.chart_of_accounts.complexity,
1127 self.config.global.industry,
1128 self.seed,
1129 );
1130
1131 let coa = Arc::new(gen.generate());
1132 self.coa = Some(Arc::clone(&coa));
1133
1134 if let Some(pb) = pb {
1135 pb.finish_with_message("Chart of Accounts complete");
1136 }
1137
1138 Ok(coa)
1139 }
1140
1141 fn generate_master_data(&mut self) -> SynthResult<()> {
1143 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1144 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1145 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1146
1147 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
1149
1150 for (i, company) in self.config.companies.iter().enumerate() {
1151 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
1152
1153 let mut vendor_gen = VendorGenerator::new(company_seed);
1155 let vendor_pool = vendor_gen.generate_vendor_pool(
1156 self.phase_config.vendors_per_company,
1157 &company.code,
1158 start_date,
1159 );
1160 self.master_data.vendors.extend(vendor_pool.vendors);
1161 if let Some(pb) = &pb {
1162 pb.inc(1);
1163 }
1164
1165 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
1167 let customer_pool = customer_gen.generate_customer_pool(
1168 self.phase_config.customers_per_company,
1169 &company.code,
1170 start_date,
1171 );
1172 self.master_data.customers.extend(customer_pool.customers);
1173 if let Some(pb) = &pb {
1174 pb.inc(1);
1175 }
1176
1177 let mut material_gen = MaterialGenerator::new(company_seed + 200);
1179 let material_pool = material_gen.generate_material_pool(
1180 self.phase_config.materials_per_company,
1181 &company.code,
1182 start_date,
1183 );
1184 self.master_data.materials.extend(material_pool.materials);
1185 if let Some(pb) = &pb {
1186 pb.inc(1);
1187 }
1188
1189 let mut asset_gen = AssetGenerator::new(company_seed + 300);
1191 let asset_pool = asset_gen.generate_asset_pool(
1192 self.phase_config.assets_per_company,
1193 &company.code,
1194 (start_date, end_date),
1195 );
1196 self.master_data.assets.extend(asset_pool.assets);
1197 if let Some(pb) = &pb {
1198 pb.inc(1);
1199 }
1200
1201 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
1203 let employee_pool =
1204 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
1205 self.master_data.employees.extend(employee_pool.employees);
1206 if let Some(pb) = &pb {
1207 pb.inc(1);
1208 }
1209 }
1210
1211 if let Some(pb) = pb {
1212 pb.finish_with_message("Master data generation complete");
1213 }
1214
1215 Ok(())
1216 }
1217
1218 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
1220 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1221 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1222
1223 let p2p_count = self
1225 .phase_config
1226 .p2p_chains
1227 .min(self.master_data.vendors.len() * 2);
1228 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
1229
1230 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
1232 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
1233
1234 for i in 0..p2p_count {
1235 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
1236 let materials: Vec<&Material> = self
1237 .master_data
1238 .materials
1239 .iter()
1240 .skip(i % self.master_data.materials.len().max(1))
1241 .take(2.min(self.master_data.materials.len()))
1242 .collect();
1243
1244 if materials.is_empty() {
1245 continue;
1246 }
1247
1248 let company = &self.config.companies[i % self.config.companies.len()];
1249 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
1250 let fiscal_period = po_date.month() as u8;
1251 let created_by = self
1252 .master_data
1253 .employees
1254 .first()
1255 .map(|e| e.user_id.as_str())
1256 .unwrap_or("SYSTEM");
1257
1258 let chain = p2p_gen.generate_chain(
1259 &company.code,
1260 vendor,
1261 &materials,
1262 po_date,
1263 start_date.year() as u16,
1264 fiscal_period,
1265 created_by,
1266 );
1267
1268 flows.purchase_orders.push(chain.purchase_order.clone());
1270 flows.goods_receipts.extend(chain.goods_receipts.clone());
1271 if let Some(vi) = &chain.vendor_invoice {
1272 flows.vendor_invoices.push(vi.clone());
1273 }
1274 if let Some(payment) = &chain.payment {
1275 flows.payments.push(payment.clone());
1276 }
1277 flows.p2p_chains.push(chain);
1278
1279 if let Some(pb) = &pb {
1280 pb.inc(1);
1281 }
1282 }
1283
1284 if let Some(pb) = pb {
1285 pb.finish_with_message("P2P document flows complete");
1286 }
1287
1288 let o2c_count = self
1290 .phase_config
1291 .o2c_chains
1292 .min(self.master_data.customers.len() * 2);
1293 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
1294
1295 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
1297 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
1298
1299 for i in 0..o2c_count {
1300 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
1301 let materials: Vec<&Material> = self
1302 .master_data
1303 .materials
1304 .iter()
1305 .skip(i % self.master_data.materials.len().max(1))
1306 .take(2.min(self.master_data.materials.len()))
1307 .collect();
1308
1309 if materials.is_empty() {
1310 continue;
1311 }
1312
1313 let company = &self.config.companies[i % self.config.companies.len()];
1314 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
1315 let fiscal_period = so_date.month() as u8;
1316 let created_by = self
1317 .master_data
1318 .employees
1319 .first()
1320 .map(|e| e.user_id.as_str())
1321 .unwrap_or("SYSTEM");
1322
1323 let chain = o2c_gen.generate_chain(
1324 &company.code,
1325 customer,
1326 &materials,
1327 so_date,
1328 start_date.year() as u16,
1329 fiscal_period,
1330 created_by,
1331 );
1332
1333 flows.sales_orders.push(chain.sales_order.clone());
1335 flows.deliveries.extend(chain.deliveries.clone());
1336 if let Some(ci) = &chain.customer_invoice {
1337 flows.customer_invoices.push(ci.clone());
1338 }
1339 if let Some(receipt) = &chain.customer_receipt {
1340 flows.payments.push(receipt.clone());
1341 }
1342 flows.o2c_chains.push(chain);
1343
1344 if let Some(pb) = &pb {
1345 pb.inc(1);
1346 }
1347 }
1348
1349 if let Some(pb) = pb {
1350 pb.finish_with_message("O2C document flows complete");
1351 }
1352
1353 Ok(())
1354 }
1355
1356 fn generate_journal_entries(
1358 &mut self,
1359 coa: &Arc<ChartOfAccounts>,
1360 ) -> SynthResult<Vec<JournalEntry>> {
1361 let total = self.calculate_total_transactions();
1362 let pb = self.create_progress_bar(total, "Generating Journal Entries");
1363
1364 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1365 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1366 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1367
1368 let company_codes: Vec<String> = self
1369 .config
1370 .companies
1371 .iter()
1372 .map(|c| c.code.clone())
1373 .collect();
1374
1375 let generator = JournalEntryGenerator::new_with_params(
1376 self.config.transactions.clone(),
1377 Arc::clone(coa),
1378 company_codes,
1379 start_date,
1380 end_date,
1381 self.seed,
1382 );
1383
1384 let mut generator = generator
1388 .with_master_data(
1389 &self.master_data.vendors,
1390 &self.master_data.customers,
1391 &self.master_data.materials,
1392 )
1393 .with_persona_errors(true)
1394 .with_fraud_config(self.config.fraud.clone());
1395
1396 if self.config.temporal.enabled {
1398 let drift_config = self.config.temporal.to_core_config();
1399 generator = generator.with_drift_config(drift_config, self.seed + 100);
1400 }
1401
1402 let mut entries = Vec::with_capacity(total as usize);
1403
1404 self.check_memory_limit()?;
1406
1407 const MEMORY_CHECK_INTERVAL: u64 = 1000;
1409
1410 for i in 0..total {
1411 let entry = generator.generate();
1412 entries.push(entry);
1413 if let Some(pb) = &pb {
1414 pb.inc(1);
1415 }
1416
1417 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
1419 self.check_memory_limit()?;
1420 }
1421 }
1422
1423 if let Some(pb) = pb {
1424 pb.finish_with_message("Journal entries complete");
1425 }
1426
1427 Ok(entries)
1428 }
1429
1430 fn generate_jes_from_document_flows(
1435 &mut self,
1436 flows: &DocumentFlowSnapshot,
1437 ) -> SynthResult<Vec<JournalEntry>> {
1438 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1439 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
1440
1441 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
1442 DocumentFlowJeConfig::default(),
1443 self.seed,
1444 );
1445 let mut entries = Vec::new();
1446
1447 for chain in &flows.p2p_chains {
1449 let chain_entries = generator.generate_from_p2p_chain(chain);
1450 entries.extend(chain_entries);
1451 if let Some(pb) = &pb {
1452 pb.inc(1);
1453 }
1454 }
1455
1456 for chain in &flows.o2c_chains {
1458 let chain_entries = generator.generate_from_o2c_chain(chain);
1459 entries.extend(chain_entries);
1460 if let Some(pb) = &pb {
1461 pb.inc(1);
1462 }
1463 }
1464
1465 if let Some(pb) = pb {
1466 pb.finish_with_message(format!(
1467 "Generated {} JEs from document flows",
1468 entries.len()
1469 ));
1470 }
1471
1472 Ok(entries)
1473 }
1474
1475 fn link_document_flows_to_subledgers(
1480 &mut self,
1481 flows: &DocumentFlowSnapshot,
1482 ) -> SynthResult<SubledgerSnapshot> {
1483 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
1484 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
1485
1486 let mut linker = DocumentFlowLinker::new();
1487
1488 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
1490 if let Some(pb) = &pb {
1491 pb.inc(flows.vendor_invoices.len() as u64);
1492 }
1493
1494 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
1496 if let Some(pb) = &pb {
1497 pb.inc(flows.customer_invoices.len() as u64);
1498 }
1499
1500 if let Some(pb) = pb {
1501 pb.finish_with_message(format!(
1502 "Linked {} AP and {} AR invoices",
1503 ap_invoices.len(),
1504 ar_invoices.len()
1505 ));
1506 }
1507
1508 Ok(SubledgerSnapshot {
1509 ap_invoices,
1510 ar_invoices,
1511 })
1512 }
1513
1514 fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
1519 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1520 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
1521
1522 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
1524 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
1525
1526 let ocpm_config = OcpmGeneratorConfig {
1528 generate_p2p: true,
1529 generate_o2c: true,
1530 happy_path_rate: 0.75,
1531 exception_path_rate: 0.20,
1532 error_path_rate: 0.05,
1533 add_duration_variability: true,
1534 duration_std_dev_factor: 0.3,
1535 };
1536 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
1537
1538 let available_users: Vec<String> = self
1540 .master_data
1541 .employees
1542 .iter()
1543 .take(20)
1544 .map(|e| e.user_id.clone())
1545 .collect();
1546
1547 for chain in &flows.p2p_chains {
1549 let po = &chain.purchase_order;
1550 let documents = P2pDocuments::new(
1551 &po.header.document_id,
1552 &po.vendor_id,
1553 &po.header.company_code,
1554 po.total_net_amount,
1555 &po.header.currency,
1556 )
1557 .with_goods_receipt(
1558 chain
1559 .goods_receipts
1560 .first()
1561 .map(|gr| gr.header.document_id.as_str())
1562 .unwrap_or(""),
1563 )
1564 .with_invoice(
1565 chain
1566 .vendor_invoice
1567 .as_ref()
1568 .map(|vi| vi.header.document_id.as_str())
1569 .unwrap_or(""),
1570 )
1571 .with_payment(
1572 chain
1573 .payment
1574 .as_ref()
1575 .map(|p| p.header.document_id.as_str())
1576 .unwrap_or(""),
1577 );
1578
1579 let start_time =
1580 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
1581 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
1582
1583 for event in result.events {
1585 event_log.add_event(event);
1586 }
1587 for object in result.objects {
1588 event_log.add_object(object);
1589 }
1590 for relationship in result.relationships {
1591 event_log.add_relationship(relationship);
1592 }
1593 event_log.add_case(result.case_trace);
1594
1595 if let Some(pb) = &pb {
1596 pb.inc(1);
1597 }
1598 }
1599
1600 for chain in &flows.o2c_chains {
1602 let so = &chain.sales_order;
1603 let documents = O2cDocuments::new(
1604 &so.header.document_id,
1605 &so.customer_id,
1606 &so.header.company_code,
1607 so.total_net_amount,
1608 &so.header.currency,
1609 )
1610 .with_delivery(
1611 chain
1612 .deliveries
1613 .first()
1614 .map(|d| d.header.document_id.as_str())
1615 .unwrap_or(""),
1616 )
1617 .with_invoice(
1618 chain
1619 .customer_invoice
1620 .as_ref()
1621 .map(|ci| ci.header.document_id.as_str())
1622 .unwrap_or(""),
1623 )
1624 .with_receipt(
1625 chain
1626 .customer_receipt
1627 .as_ref()
1628 .map(|r| r.header.document_id.as_str())
1629 .unwrap_or(""),
1630 );
1631
1632 let start_time =
1633 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
1634 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
1635
1636 for event in result.events {
1638 event_log.add_event(event);
1639 }
1640 for object in result.objects {
1641 event_log.add_object(object);
1642 }
1643 for relationship in result.relationships {
1644 event_log.add_relationship(relationship);
1645 }
1646 event_log.add_case(result.case_trace);
1647
1648 if let Some(pb) = &pb {
1649 pb.inc(1);
1650 }
1651 }
1652
1653 event_log.compute_variants();
1655
1656 let summary = event_log.summary();
1657
1658 if let Some(pb) = pb {
1659 pb.finish_with_message(format!(
1660 "Generated {} OCPM events, {} objects",
1661 summary.event_count, summary.object_count
1662 ));
1663 }
1664
1665 Ok(OcpmSnapshot {
1666 event_count: summary.event_count,
1667 object_count: summary.object_count,
1668 case_count: summary.case_count,
1669 event_log: Some(event_log),
1670 })
1671 }
1672
1673 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
1675 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
1676
1677 let anomaly_config = AnomalyInjectorConfig {
1678 rates: AnomalyRateConfig {
1679 total_rate: 0.02,
1680 ..Default::default()
1681 },
1682 seed: self.seed + 5000,
1683 ..Default::default()
1684 };
1685
1686 let mut injector = AnomalyInjector::new(anomaly_config);
1687 let result = injector.process_entries(entries);
1688
1689 if let Some(pb) = &pb {
1690 pb.inc(entries.len() as u64);
1691 pb.finish_with_message("Anomaly injection complete");
1692 }
1693
1694 let mut by_type = HashMap::new();
1695 for label in &result.labels {
1696 *by_type
1697 .entry(format!("{:?}", label.anomaly_type))
1698 .or_insert(0) += 1;
1699 }
1700
1701 Ok(AnomalyLabels {
1702 labels: result.labels,
1703 summary: Some(result.summary),
1704 by_type,
1705 })
1706 }
1707
1708 fn validate_journal_entries(
1717 &mut self,
1718 entries: &[JournalEntry],
1719 ) -> SynthResult<BalanceValidationResult> {
1720 let clean_entries: Vec<&JournalEntry> = entries
1722 .iter()
1723 .filter(|e| {
1724 e.header
1725 .header_text
1726 .as_ref()
1727 .map(|t| !t.contains("[HUMAN_ERROR:"))
1728 .unwrap_or(true)
1729 })
1730 .collect();
1731
1732 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
1733
1734 let config = BalanceTrackerConfig {
1736 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
1740 };
1741
1742 let mut tracker = RunningBalanceTracker::new(config);
1743
1744 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
1746 let errors = tracker.apply_entries(&clean_refs);
1747
1748 if let Some(pb) = &pb {
1749 pb.inc(entries.len() as u64);
1750 }
1751
1752 let has_unbalanced = tracker
1755 .get_validation_errors()
1756 .iter()
1757 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
1758
1759 let mut all_errors = errors;
1762 all_errors.extend(tracker.get_validation_errors().iter().cloned());
1763 let company_codes: Vec<String> = self
1764 .config
1765 .companies
1766 .iter()
1767 .map(|c| c.code.clone())
1768 .collect();
1769
1770 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1771 .map(|d| d + chrono::Months::new(self.config.global.period_months))
1772 .unwrap_or_else(|_| chrono::Local::now().date_naive());
1773
1774 for company_code in &company_codes {
1775 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
1776 all_errors.push(e);
1777 }
1778 }
1779
1780 let stats = tracker.get_statistics();
1782
1783 let is_balanced = all_errors.is_empty();
1785
1786 if let Some(pb) = pb {
1787 let msg = if is_balanced {
1788 "Balance validation passed"
1789 } else {
1790 "Balance validation completed with errors"
1791 };
1792 pb.finish_with_message(msg);
1793 }
1794
1795 Ok(BalanceValidationResult {
1796 validated: true,
1797 is_balanced,
1798 entries_processed: stats.entries_processed,
1799 total_debits: stats.total_debits,
1800 total_credits: stats.total_credits,
1801 accounts_tracked: stats.accounts_tracked,
1802 companies_tracked: stats.companies_tracked,
1803 validation_errors: all_errors,
1804 has_unbalanced_entries: has_unbalanced,
1805 })
1806 }
1807
1808 fn inject_data_quality(
1813 &mut self,
1814 entries: &mut [JournalEntry],
1815 ) -> SynthResult<DataQualityStats> {
1816 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
1817
1818 let config = DataQualityConfig::minimal();
1820 let mut injector = DataQualityInjector::new(config);
1821
1822 let context = HashMap::new();
1824
1825 for entry in entries.iter_mut() {
1826 if let Some(text) = &entry.header.header_text {
1828 let processed = injector.process_text_field(
1829 "header_text",
1830 text,
1831 &entry.header.document_id.to_string(),
1832 &context,
1833 );
1834 match processed {
1835 Some(new_text) if new_text != *text => {
1836 entry.header.header_text = Some(new_text);
1837 }
1838 None => {
1839 entry.header.header_text = None; }
1841 _ => {}
1842 }
1843 }
1844
1845 if let Some(ref_text) = &entry.header.reference {
1847 let processed = injector.process_text_field(
1848 "reference",
1849 ref_text,
1850 &entry.header.document_id.to_string(),
1851 &context,
1852 );
1853 match processed {
1854 Some(new_text) if new_text != *ref_text => {
1855 entry.header.reference = Some(new_text);
1856 }
1857 None => {
1858 entry.header.reference = None;
1859 }
1860 _ => {}
1861 }
1862 }
1863
1864 let user_persona = entry.header.user_persona.clone();
1866 if let Some(processed) = injector.process_text_field(
1867 "user_persona",
1868 &user_persona,
1869 &entry.header.document_id.to_string(),
1870 &context,
1871 ) {
1872 if processed != user_persona {
1873 entry.header.user_persona = processed;
1874 }
1875 }
1876
1877 for line in &mut entry.lines {
1879 if let Some(ref text) = line.line_text {
1881 let processed = injector.process_text_field(
1882 "line_text",
1883 text,
1884 &entry.header.document_id.to_string(),
1885 &context,
1886 );
1887 match processed {
1888 Some(new_text) if new_text != *text => {
1889 line.line_text = Some(new_text);
1890 }
1891 None => {
1892 line.line_text = None;
1893 }
1894 _ => {}
1895 }
1896 }
1897
1898 if let Some(cc) = &line.cost_center {
1900 let processed = injector.process_text_field(
1901 "cost_center",
1902 cc,
1903 &entry.header.document_id.to_string(),
1904 &context,
1905 );
1906 match processed {
1907 Some(new_cc) if new_cc != *cc => {
1908 line.cost_center = Some(new_cc);
1909 }
1910 None => {
1911 line.cost_center = None;
1912 }
1913 _ => {}
1914 }
1915 }
1916 }
1917
1918 if let Some(pb) = &pb {
1919 pb.inc(1);
1920 }
1921 }
1922
1923 if let Some(pb) = pb {
1924 pb.finish_with_message("Data quality injection complete");
1925 }
1926
1927 Ok(injector.stats().clone())
1928 }
1929
1930 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
1941 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1942 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1943 let fiscal_year = start_date.year() as u16;
1944 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
1945
1946 let total_revenue: rust_decimal::Decimal = entries
1948 .iter()
1949 .flat_map(|e| e.lines.iter())
1950 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
1951 .map(|l| l.credit_amount)
1952 .sum();
1953
1954 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
1956
1957 let mut snapshot = AuditSnapshot::default();
1958
1959 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
1961 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
1962 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
1963 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
1964 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
1965 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
1966
1967 let accounts: Vec<String> = self
1969 .coa
1970 .as_ref()
1971 .map(|coa| {
1972 coa.get_postable_accounts()
1973 .iter()
1974 .map(|acc| acc.account_code().to_string())
1975 .collect()
1976 })
1977 .unwrap_or_default();
1978
1979 for (i, company) in self.config.companies.iter().enumerate() {
1981 let company_revenue = total_revenue
1983 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
1984
1985 let engagements_for_company =
1987 self.phase_config.audit_engagements / self.config.companies.len().max(1);
1988 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
1989 1
1990 } else {
1991 0
1992 };
1993
1994 for _eng_idx in 0..(engagements_for_company + extra) {
1995 let engagement = engagement_gen.generate_engagement(
1997 &company.code,
1998 &company.name,
1999 fiscal_year,
2000 period_end,
2001 company_revenue,
2002 None, );
2004
2005 if let Some(pb) = &pb {
2006 pb.inc(1);
2007 }
2008
2009 let team_members: Vec<String> = engagement.team_member_ids.clone();
2011
2012 let workpapers =
2014 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
2015
2016 for wp in &workpapers {
2017 if let Some(pb) = &pb {
2018 pb.inc(1);
2019 }
2020
2021 let evidence = evidence_gen.generate_evidence_for_workpaper(
2023 wp,
2024 &team_members,
2025 wp.preparer_date,
2026 );
2027
2028 for _ in &evidence {
2029 if let Some(pb) = &pb {
2030 pb.inc(1);
2031 }
2032 }
2033
2034 snapshot.evidence.extend(evidence);
2035 }
2036
2037 let risks =
2039 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
2040
2041 for _ in &risks {
2042 if let Some(pb) = &pb {
2043 pb.inc(1);
2044 }
2045 }
2046 snapshot.risk_assessments.extend(risks);
2047
2048 let findings = finding_gen.generate_findings_for_engagement(
2050 &engagement,
2051 &workpapers,
2052 &team_members,
2053 );
2054
2055 for _ in &findings {
2056 if let Some(pb) = &pb {
2057 pb.inc(1);
2058 }
2059 }
2060 snapshot.findings.extend(findings);
2061
2062 let judgments =
2064 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
2065
2066 for _ in &judgments {
2067 if let Some(pb) = &pb {
2068 pb.inc(1);
2069 }
2070 }
2071 snapshot.judgments.extend(judgments);
2072
2073 snapshot.workpapers.extend(workpapers);
2075 snapshot.engagements.push(engagement);
2076 }
2077 }
2078
2079 if let Some(pb) = pb {
2080 pb.finish_with_message(format!(
2081 "Audit data: {} engagements, {} workpapers, {} evidence",
2082 snapshot.engagements.len(),
2083 snapshot.workpapers.len(),
2084 snapshot.evidence.len()
2085 ));
2086 }
2087
2088 Ok(snapshot)
2089 }
2090
2091 fn export_graphs(
2098 &mut self,
2099 entries: &[JournalEntry],
2100 _coa: &Arc<ChartOfAccounts>,
2101 stats: &mut EnhancedGenerationStatistics,
2102 ) -> SynthResult<GraphExportSnapshot> {
2103 let pb = self.create_progress_bar(100, "Exporting Graphs");
2104
2105 let mut snapshot = GraphExportSnapshot::default();
2106
2107 let output_dir = self
2109 .output_path
2110 .clone()
2111 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
2112 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
2113
2114 for graph_type in &self.config.graph_export.graph_types {
2116 if let Some(pb) = &pb {
2117 pb.inc(10);
2118 }
2119
2120 let graph_config = TransactionGraphConfig {
2122 include_vendors: false,
2123 include_customers: false,
2124 create_debit_credit_edges: true,
2125 include_document_nodes: graph_type.include_document_nodes,
2126 min_edge_weight: graph_type.min_edge_weight,
2127 aggregate_parallel_edges: graph_type.aggregate_edges,
2128 };
2129
2130 let mut builder = TransactionGraphBuilder::new(graph_config);
2131 builder.add_journal_entries(entries);
2132 let graph = builder.build();
2133
2134 stats.graph_node_count += graph.node_count();
2136 stats.graph_edge_count += graph.edge_count();
2137
2138 if let Some(pb) = &pb {
2139 pb.inc(40);
2140 }
2141
2142 for format in &self.config.graph_export.formats {
2144 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
2145
2146 if let Err(e) = std::fs::create_dir_all(&format_dir) {
2148 warn!("Failed to create graph output directory: {}", e);
2149 continue;
2150 }
2151
2152 match format {
2153 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
2154 let pyg_config = PyGExportConfig {
2155 export_node_features: true,
2156 export_edge_features: true,
2157 export_node_labels: true,
2158 export_edge_labels: true,
2159 one_hot_categoricals: false,
2160 export_masks: true,
2161 train_ratio: self.config.graph_export.train_ratio,
2162 val_ratio: self.config.graph_export.validation_ratio,
2163 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
2164 };
2165
2166 let exporter = PyGExporter::new(pyg_config);
2167 match exporter.export(&graph, &format_dir) {
2168 Ok(metadata) => {
2169 snapshot.exports.insert(
2170 format!("{}_{}", graph_type.name, "pytorch_geometric"),
2171 GraphExportInfo {
2172 name: graph_type.name.clone(),
2173 format: "pytorch_geometric".to_string(),
2174 output_path: format_dir.clone(),
2175 node_count: metadata.num_nodes,
2176 edge_count: metadata.num_edges,
2177 },
2178 );
2179 snapshot.graph_count += 1;
2180 }
2181 Err(e) => {
2182 warn!("Failed to export PyTorch Geometric graph: {}", e);
2183 }
2184 }
2185 }
2186 datasynth_config::schema::GraphExportFormat::Neo4j => {
2187 debug!("Neo4j export not yet implemented for accounting networks");
2189 }
2190 datasynth_config::schema::GraphExportFormat::Dgl => {
2191 debug!("DGL export not yet implemented for accounting networks");
2193 }
2194 datasynth_config::schema::GraphExportFormat::RustGraph => {
2195 use datasynth_graph::{
2196 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
2197 };
2198
2199 let rustgraph_config = RustGraphExportConfig {
2200 include_features: true,
2201 include_temporal: true,
2202 include_labels: true,
2203 source_name: "datasynth".to_string(),
2204 batch_id: None,
2205 output_format: RustGraphOutputFormat::JsonLines,
2206 export_node_properties: true,
2207 export_edge_properties: true,
2208 pretty_print: false,
2209 };
2210
2211 let exporter = RustGraphExporter::new(rustgraph_config);
2212 match exporter.export(&graph, &format_dir) {
2213 Ok(metadata) => {
2214 snapshot.exports.insert(
2215 format!("{}_{}", graph_type.name, "rustgraph"),
2216 GraphExportInfo {
2217 name: graph_type.name.clone(),
2218 format: "rustgraph".to_string(),
2219 output_path: format_dir.clone(),
2220 node_count: metadata.num_nodes,
2221 edge_count: metadata.num_edges,
2222 },
2223 );
2224 snapshot.graph_count += 1;
2225 }
2226 Err(e) => {
2227 warn!("Failed to export RustGraph: {}", e);
2228 }
2229 }
2230 }
2231 }
2232 }
2233
2234 if let Some(pb) = &pb {
2235 pb.inc(40);
2236 }
2237 }
2238
2239 stats.graph_export_count = snapshot.graph_count;
2240 snapshot.exported = snapshot.graph_count > 0;
2241
2242 if let Some(pb) = pb {
2243 pb.finish_with_message(format!(
2244 "Graphs exported: {} graphs ({} nodes, {} edges)",
2245 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2246 ));
2247 }
2248
2249 Ok(snapshot)
2250 }
2251
2252 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
2257 let pb = self.create_progress_bar(100, "Generating Banking Data");
2258
2259 let orchestrator = BankingOrchestratorBuilder::new()
2261 .config(self.config.banking.clone())
2262 .seed(self.seed + 9000)
2263 .build();
2264
2265 if let Some(pb) = &pb {
2266 pb.inc(10);
2267 }
2268
2269 let result = orchestrator.generate();
2271
2272 if let Some(pb) = &pb {
2273 pb.inc(90);
2274 pb.finish_with_message(format!(
2275 "Banking: {} customers, {} transactions",
2276 result.customers.len(),
2277 result.transactions.len()
2278 ));
2279 }
2280
2281 Ok(BankingSnapshot {
2282 customers: result.customers,
2283 accounts: result.accounts,
2284 transactions: result.transactions,
2285 suspicious_count: result.stats.suspicious_count,
2286 scenario_count: result.scenarios.len(),
2287 })
2288 }
2289
2290 fn calculate_total_transactions(&self) -> u64 {
2292 let months = self.config.global.period_months as f64;
2293 self.config
2294 .companies
2295 .iter()
2296 .map(|c| {
2297 let annual = c.annual_transaction_volume.count() as f64;
2298 let weighted = annual * c.volume_weight;
2299 (weighted * months / 12.0) as u64
2300 })
2301 .sum()
2302 }
2303
2304 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
2306 if !self.phase_config.show_progress {
2307 return None;
2308 }
2309
2310 let pb = if let Some(mp) = &self.multi_progress {
2311 mp.add(ProgressBar::new(total))
2312 } else {
2313 ProgressBar::new(total)
2314 };
2315
2316 pb.set_style(
2317 ProgressStyle::default_bar()
2318 .template(&format!(
2319 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
2320 message
2321 ))
2322 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
2323 .progress_chars("#>-"),
2324 );
2325
2326 Some(pb)
2327 }
2328
2329 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
2331 self.coa.clone()
2332 }
2333
2334 pub fn get_master_data(&self) -> &MasterDataSnapshot {
2336 &self.master_data
2337 }
2338}
2339
2340fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
2342 match format {
2343 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
2344 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
2345 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
2346 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
2347 }
2348}
2349
2350#[cfg(test)]
2351mod tests {
2352 use super::*;
2353 use datasynth_config::schema::*;
2354
2355 fn create_test_config() -> GeneratorConfig {
2356 GeneratorConfig {
2357 global: GlobalConfig {
2358 industry: IndustrySector::Manufacturing,
2359 start_date: "2024-01-01".to_string(),
2360 period_months: 1,
2361 seed: Some(42),
2362 parallel: false,
2363 group_currency: "USD".to_string(),
2364 worker_threads: 0,
2365 memory_limit_mb: 0,
2366 },
2367 companies: vec![CompanyConfig {
2368 code: "1000".to_string(),
2369 name: "Test Company".to_string(),
2370 currency: "USD".to_string(),
2371 country: "US".to_string(),
2372 annual_transaction_volume: TransactionVolume::TenK,
2373 volume_weight: 1.0,
2374 fiscal_year_variant: "K4".to_string(),
2375 }],
2376 chart_of_accounts: ChartOfAccountsConfig {
2377 complexity: CoAComplexity::Small,
2378 industry_specific: true,
2379 custom_accounts: None,
2380 min_hierarchy_depth: 2,
2381 max_hierarchy_depth: 4,
2382 },
2383 transactions: TransactionConfig::default(),
2384 output: OutputConfig::default(),
2385 fraud: FraudConfig::default(),
2386 internal_controls: InternalControlsConfig::default(),
2387 business_processes: BusinessProcessConfig::default(),
2388 user_personas: UserPersonaConfig::default(),
2389 templates: TemplateConfig::default(),
2390 approval: ApprovalConfig::default(),
2391 departments: DepartmentConfig::default(),
2392 master_data: MasterDataConfig::default(),
2393 document_flows: DocumentFlowConfig::default(),
2394 intercompany: IntercompanyConfig::default(),
2395 balance: BalanceConfig::default(),
2396 ocpm: OcpmConfig::default(),
2397 audit: AuditGenerationConfig::default(),
2398 banking: datasynth_banking::BankingConfig::default(),
2399 data_quality: DataQualitySchemaConfig::default(),
2400 scenario: ScenarioConfig::default(),
2401 temporal: TemporalDriftConfig::default(),
2402 graph_export: GraphExportConfig::default(),
2403 streaming: StreamingSchemaConfig::default(),
2404 rate_limit: RateLimitSchemaConfig::default(),
2405 temporal_attributes: TemporalAttributeSchemaConfig::default(),
2406 relationships: RelationshipSchemaConfig::default(),
2407 accounting_standards: AccountingStandardsConfig::default(),
2408 audit_standards: AuditStandardsConfig::default(),
2409 }
2410 }
2411
2412 #[test]
2413 fn test_enhanced_orchestrator_creation() {
2414 let config = create_test_config();
2415 let orchestrator = EnhancedOrchestrator::with_defaults(config);
2416 assert!(orchestrator.is_ok());
2417 }
2418
2419 #[test]
2420 fn test_minimal_generation() {
2421 let config = create_test_config();
2422 let phase_config = PhaseConfig {
2423 generate_master_data: false,
2424 generate_document_flows: false,
2425 generate_journal_entries: true,
2426 inject_anomalies: false,
2427 show_progress: false,
2428 ..Default::default()
2429 };
2430
2431 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2432 let result = orchestrator.generate();
2433
2434 assert!(result.is_ok());
2435 let result = result.unwrap();
2436 assert!(!result.journal_entries.is_empty());
2437 }
2438
2439 #[test]
2440 fn test_master_data_generation() {
2441 let config = create_test_config();
2442 let phase_config = PhaseConfig {
2443 generate_master_data: true,
2444 generate_document_flows: false,
2445 generate_journal_entries: false,
2446 inject_anomalies: false,
2447 show_progress: false,
2448 vendors_per_company: 5,
2449 customers_per_company: 5,
2450 materials_per_company: 10,
2451 assets_per_company: 5,
2452 employees_per_company: 10,
2453 ..Default::default()
2454 };
2455
2456 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2457 let result = orchestrator.generate().unwrap();
2458
2459 assert!(!result.master_data.vendors.is_empty());
2460 assert!(!result.master_data.customers.is_empty());
2461 assert!(!result.master_data.materials.is_empty());
2462 }
2463
2464 #[test]
2465 fn test_document_flow_generation() {
2466 let config = create_test_config();
2467 let phase_config = PhaseConfig {
2468 generate_master_data: true,
2469 generate_document_flows: true,
2470 generate_journal_entries: false,
2471 inject_anomalies: false,
2472 inject_data_quality: false,
2473 validate_balances: false,
2474 generate_ocpm_events: false,
2475 show_progress: false,
2476 vendors_per_company: 5,
2477 customers_per_company: 5,
2478 materials_per_company: 10,
2479 assets_per_company: 5,
2480 employees_per_company: 10,
2481 p2p_chains: 5,
2482 o2c_chains: 5,
2483 ..Default::default()
2484 };
2485
2486 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2487 let result = orchestrator.generate().unwrap();
2488
2489 assert!(!result.document_flows.p2p_chains.is_empty());
2491 assert!(!result.document_flows.o2c_chains.is_empty());
2492
2493 assert!(!result.document_flows.purchase_orders.is_empty());
2495 assert!(!result.document_flows.sales_orders.is_empty());
2496 }
2497
2498 #[test]
2499 fn test_anomaly_injection() {
2500 let config = create_test_config();
2501 let phase_config = PhaseConfig {
2502 generate_master_data: false,
2503 generate_document_flows: false,
2504 generate_journal_entries: true,
2505 inject_anomalies: true,
2506 show_progress: false,
2507 ..Default::default()
2508 };
2509
2510 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2511 let result = orchestrator.generate().unwrap();
2512
2513 assert!(!result.journal_entries.is_empty());
2515
2516 assert!(result.anomaly_labels.summary.is_some());
2519 }
2520
2521 #[test]
2522 fn test_full_generation_pipeline() {
2523 let config = create_test_config();
2524 let phase_config = PhaseConfig {
2525 generate_master_data: true,
2526 generate_document_flows: true,
2527 generate_journal_entries: true,
2528 inject_anomalies: false,
2529 inject_data_quality: false,
2530 validate_balances: true,
2531 generate_ocpm_events: false,
2532 show_progress: false,
2533 vendors_per_company: 3,
2534 customers_per_company: 3,
2535 materials_per_company: 5,
2536 assets_per_company: 3,
2537 employees_per_company: 5,
2538 p2p_chains: 3,
2539 o2c_chains: 3,
2540 ..Default::default()
2541 };
2542
2543 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2544 let result = orchestrator.generate().unwrap();
2545
2546 assert!(!result.master_data.vendors.is_empty());
2548 assert!(!result.master_data.customers.is_empty());
2549 assert!(!result.document_flows.p2p_chains.is_empty());
2550 assert!(!result.document_flows.o2c_chains.is_empty());
2551 assert!(!result.journal_entries.is_empty());
2552 assert!(result.statistics.accounts_count > 0);
2553
2554 assert!(!result.subledger.ap_invoices.is_empty());
2556 assert!(!result.subledger.ar_invoices.is_empty());
2557
2558 assert!(result.balance_validation.validated);
2560 assert!(result.balance_validation.entries_processed > 0);
2561 }
2562
2563 #[test]
2564 fn test_subledger_linking() {
2565 let config = create_test_config();
2566 let phase_config = PhaseConfig {
2567 generate_master_data: true,
2568 generate_document_flows: true,
2569 generate_journal_entries: false,
2570 inject_anomalies: false,
2571 inject_data_quality: false,
2572 validate_balances: false,
2573 generate_ocpm_events: false,
2574 show_progress: false,
2575 vendors_per_company: 5,
2576 customers_per_company: 5,
2577 materials_per_company: 10,
2578 assets_per_company: 3,
2579 employees_per_company: 5,
2580 p2p_chains: 5,
2581 o2c_chains: 5,
2582 ..Default::default()
2583 };
2584
2585 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2586 let result = orchestrator.generate().unwrap();
2587
2588 assert!(!result.document_flows.vendor_invoices.is_empty());
2590 assert!(!result.document_flows.customer_invoices.is_empty());
2591
2592 assert!(!result.subledger.ap_invoices.is_empty());
2594 assert!(!result.subledger.ar_invoices.is_empty());
2595
2596 assert_eq!(
2598 result.subledger.ap_invoices.len(),
2599 result.document_flows.vendor_invoices.len()
2600 );
2601
2602 assert_eq!(
2604 result.subledger.ar_invoices.len(),
2605 result.document_flows.customer_invoices.len()
2606 );
2607
2608 assert_eq!(
2610 result.statistics.ap_invoice_count,
2611 result.subledger.ap_invoices.len()
2612 );
2613 assert_eq!(
2614 result.statistics.ar_invoice_count,
2615 result.subledger.ar_invoices.len()
2616 );
2617 }
2618
2619 #[test]
2620 fn test_balance_validation() {
2621 let config = create_test_config();
2622 let phase_config = PhaseConfig {
2623 generate_master_data: false,
2624 generate_document_flows: false,
2625 generate_journal_entries: true,
2626 inject_anomalies: false,
2627 validate_balances: true,
2628 show_progress: false,
2629 ..Default::default()
2630 };
2631
2632 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2633 let result = orchestrator.generate().unwrap();
2634
2635 assert!(result.balance_validation.validated);
2637 assert!(result.balance_validation.entries_processed > 0);
2638
2639 assert!(!result.balance_validation.has_unbalanced_entries);
2641
2642 assert_eq!(
2644 result.balance_validation.total_debits,
2645 result.balance_validation.total_credits
2646 );
2647 }
2648
2649 #[test]
2650 fn test_statistics_accuracy() {
2651 let config = create_test_config();
2652 let phase_config = PhaseConfig {
2653 generate_master_data: true,
2654 generate_document_flows: false,
2655 generate_journal_entries: true,
2656 inject_anomalies: false,
2657 show_progress: false,
2658 vendors_per_company: 10,
2659 customers_per_company: 20,
2660 materials_per_company: 15,
2661 assets_per_company: 5,
2662 employees_per_company: 8,
2663 ..Default::default()
2664 };
2665
2666 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2667 let result = orchestrator.generate().unwrap();
2668
2669 assert_eq!(
2671 result.statistics.vendor_count,
2672 result.master_data.vendors.len()
2673 );
2674 assert_eq!(
2675 result.statistics.customer_count,
2676 result.master_data.customers.len()
2677 );
2678 assert_eq!(
2679 result.statistics.material_count,
2680 result.master_data.materials.len()
2681 );
2682 assert_eq!(
2683 result.statistics.total_entries as usize,
2684 result.journal_entries.len()
2685 );
2686 }
2687
2688 #[test]
2689 fn test_phase_config_defaults() {
2690 let config = PhaseConfig::default();
2691 assert!(config.generate_master_data);
2692 assert!(config.generate_document_flows);
2693 assert!(config.generate_journal_entries);
2694 assert!(!config.inject_anomalies);
2695 assert!(config.validate_balances);
2696 assert!(config.show_progress);
2697 assert!(config.vendors_per_company > 0);
2698 assert!(config.customers_per_company > 0);
2699 }
2700
2701 #[test]
2702 fn test_get_coa_before_generation() {
2703 let config = create_test_config();
2704 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
2705
2706 assert!(orchestrator.get_coa().is_none());
2708 }
2709
2710 #[test]
2711 fn test_get_coa_after_generation() {
2712 let config = create_test_config();
2713 let phase_config = PhaseConfig {
2714 generate_master_data: false,
2715 generate_document_flows: false,
2716 generate_journal_entries: true,
2717 inject_anomalies: false,
2718 show_progress: false,
2719 ..Default::default()
2720 };
2721
2722 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2723 let _ = orchestrator.generate().unwrap();
2724
2725 assert!(orchestrator.get_coa().is_some());
2727 }
2728
2729 #[test]
2730 fn test_get_master_data() {
2731 let config = create_test_config();
2732 let phase_config = PhaseConfig {
2733 generate_master_data: true,
2734 generate_document_flows: false,
2735 generate_journal_entries: false,
2736 inject_anomalies: false,
2737 show_progress: false,
2738 vendors_per_company: 5,
2739 customers_per_company: 5,
2740 materials_per_company: 5,
2741 assets_per_company: 5,
2742 employees_per_company: 5,
2743 ..Default::default()
2744 };
2745
2746 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2747 let _ = orchestrator.generate().unwrap();
2748
2749 let master_data = orchestrator.get_master_data();
2750 assert!(!master_data.vendors.is_empty());
2751 }
2752
2753 #[test]
2754 fn test_with_progress_builder() {
2755 let config = create_test_config();
2756 let orchestrator = EnhancedOrchestrator::with_defaults(config)
2757 .unwrap()
2758 .with_progress(false);
2759
2760 assert!(!orchestrator.phase_config.show_progress);
2762 }
2763
2764 #[test]
2765 fn test_multi_company_generation() {
2766 let mut config = create_test_config();
2767 config.companies.push(CompanyConfig {
2768 code: "2000".to_string(),
2769 name: "Subsidiary".to_string(),
2770 currency: "EUR".to_string(),
2771 country: "DE".to_string(),
2772 annual_transaction_volume: TransactionVolume::TenK,
2773 volume_weight: 0.5,
2774 fiscal_year_variant: "K4".to_string(),
2775 });
2776
2777 let phase_config = PhaseConfig {
2778 generate_master_data: true,
2779 generate_document_flows: false,
2780 generate_journal_entries: true,
2781 inject_anomalies: false,
2782 show_progress: false,
2783 vendors_per_company: 5,
2784 customers_per_company: 5,
2785 materials_per_company: 5,
2786 assets_per_company: 5,
2787 employees_per_company: 5,
2788 ..Default::default()
2789 };
2790
2791 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2792 let result = orchestrator.generate().unwrap();
2793
2794 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
2797 assert!(result.statistics.companies_count == 2);
2798 }
2799
2800 #[test]
2801 fn test_empty_master_data_skips_document_flows() {
2802 let config = create_test_config();
2803 let phase_config = PhaseConfig {
2804 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
2807 inject_anomalies: false,
2808 show_progress: false,
2809 ..Default::default()
2810 };
2811
2812 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2813 let result = orchestrator.generate().unwrap();
2814
2815 assert!(result.document_flows.p2p_chains.is_empty());
2817 assert!(result.document_flows.o2c_chains.is_empty());
2818 }
2819
2820 #[test]
2821 fn test_journal_entry_line_item_count() {
2822 let config = create_test_config();
2823 let phase_config = PhaseConfig {
2824 generate_master_data: false,
2825 generate_document_flows: false,
2826 generate_journal_entries: true,
2827 inject_anomalies: false,
2828 show_progress: false,
2829 ..Default::default()
2830 };
2831
2832 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2833 let result = orchestrator.generate().unwrap();
2834
2835 let calculated_line_items: u64 = result
2837 .journal_entries
2838 .iter()
2839 .map(|e| e.line_count() as u64)
2840 .sum();
2841 assert_eq!(result.statistics.total_line_items, calculated_line_items);
2842 }
2843
2844 #[test]
2845 fn test_audit_generation() {
2846 let config = create_test_config();
2847 let phase_config = PhaseConfig {
2848 generate_master_data: false,
2849 generate_document_flows: false,
2850 generate_journal_entries: true,
2851 inject_anomalies: false,
2852 show_progress: false,
2853 generate_audit: true,
2854 audit_engagements: 2,
2855 workpapers_per_engagement: 5,
2856 evidence_per_workpaper: 2,
2857 risks_per_engagement: 3,
2858 findings_per_engagement: 2,
2859 judgments_per_engagement: 2,
2860 ..Default::default()
2861 };
2862
2863 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2864 let result = orchestrator.generate().unwrap();
2865
2866 assert_eq!(result.audit.engagements.len(), 2);
2868 assert!(!result.audit.workpapers.is_empty());
2869 assert!(!result.audit.evidence.is_empty());
2870 assert!(!result.audit.risk_assessments.is_empty());
2871 assert!(!result.audit.findings.is_empty());
2872 assert!(!result.audit.judgments.is_empty());
2873
2874 assert_eq!(
2876 result.statistics.audit_engagement_count,
2877 result.audit.engagements.len()
2878 );
2879 assert_eq!(
2880 result.statistics.audit_workpaper_count,
2881 result.audit.workpapers.len()
2882 );
2883 assert_eq!(
2884 result.statistics.audit_evidence_count,
2885 result.audit.evidence.len()
2886 );
2887 assert_eq!(
2888 result.statistics.audit_risk_count,
2889 result.audit.risk_assessments.len()
2890 );
2891 assert_eq!(
2892 result.statistics.audit_finding_count,
2893 result.audit.findings.len()
2894 );
2895 assert_eq!(
2896 result.statistics.audit_judgment_count,
2897 result.audit.judgments.len()
2898 );
2899 }
2900}