1use std::collections::HashMap;
19use std::path::PathBuf;
20use std::sync::Arc;
21
22use chrono::{Datelike, NaiveDate};
23use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
24use serde::{Deserialize, Serialize};
25use tracing::{debug, info, warn};
26
27use datasynth_banking::{
28 models::{BankAccount, BankTransaction, BankingCustomer},
29 BankingOrchestratorBuilder,
30};
31use datasynth_config::schema::GeneratorConfig;
32use datasynth_core::error::{SynthError, SynthResult};
33use datasynth_core::models::audit::{
34 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
35};
36use datasynth_core::models::subledger::ap::APInvoice;
37use datasynth_core::models::subledger::ar::ARInvoice;
38use datasynth_core::models::*;
39use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
40use datasynth_fingerprint::{
41 io::FingerprintReader,
42 models::Fingerprint,
43 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
44};
45use datasynth_generators::{
46 AnomalyInjector,
48 AnomalyInjectorConfig,
49 AssetGenerator,
50 AuditEngagementGenerator,
52 BalanceTrackerConfig,
53 ChartOfAccountsGenerator,
55 CustomerGenerator,
56 DataQualityConfig,
57 DataQualityInjector,
59 DataQualityStats,
60 DocumentFlowJeConfig,
62 DocumentFlowJeGenerator,
63 DocumentFlowLinker,
65 EmployeeGenerator,
66 EvidenceGenerator,
67 FindingGenerator,
68 JournalEntryGenerator,
69 JudgmentGenerator,
70 LatePaymentDistribution,
71 MaterialGenerator,
72 O2CDocumentChain,
73 O2CGenerator,
74 O2CGeneratorConfig,
75 O2CPaymentBehavior,
76 P2PDocumentChain,
77 P2PGenerator,
79 P2PGeneratorConfig,
80 P2PPaymentBehavior,
81 RiskAssessmentGenerator,
82 RunningBalanceTracker,
84 ValidationError,
85 VendorGenerator,
87 WorkpaperGenerator,
88};
89use datasynth_graph::{
90 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
91};
92use datasynth_ocpm::{
93 EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
94 P2pDocuments,
95};
96
97use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
98use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
99use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
100use datasynth_core::llm::MockLlmProvider;
101use datasynth_core::models::documents::PaymentMethod;
102use datasynth_generators::llm_enrichment::VendorLlmEnricher;
103
104fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
110 let payment_behavior = &schema_config.payment_behavior;
111 let late_dist = &payment_behavior.late_payment_days_distribution;
112
113 P2PGeneratorConfig {
114 three_way_match_rate: schema_config.three_way_match_rate,
115 partial_delivery_rate: schema_config.partial_delivery_rate,
116 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
118 max_price_variance_percent: schema_config.max_price_variance_percent,
119 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
120 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
121 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
122 payment_method_distribution: vec![
123 (PaymentMethod::BankTransfer, 0.60),
124 (PaymentMethod::Check, 0.25),
125 (PaymentMethod::Wire, 0.10),
126 (PaymentMethod::CreditCard, 0.05),
127 ],
128 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
130 late_payment_rate: payment_behavior.late_payment_rate,
131 late_payment_distribution: LatePaymentDistribution {
132 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
133 late_8_to_14: late_dist.late_8_to_14,
134 very_late_15_to_30: late_dist.very_late_15_to_30,
135 severely_late_31_to_60: late_dist.severely_late_31_to_60,
136 extremely_late_over_60: late_dist.extremely_late_over_60,
137 },
138 partial_payment_rate: payment_behavior.partial_payment_rate,
139 payment_correction_rate: payment_behavior.payment_correction_rate,
140 },
141 }
142}
143
144fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
146 let payment_behavior = &schema_config.payment_behavior;
147
148 O2CGeneratorConfig {
149 credit_check_failure_rate: schema_config.credit_check_failure_rate,
150 partial_shipment_rate: schema_config.partial_shipment_rate,
151 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
152 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
153 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
154 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
156 returns_rate: schema_config.return_rate,
157 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
158 payment_method_distribution: vec![
159 (PaymentMethod::BankTransfer, 0.50),
160 (PaymentMethod::Check, 0.30),
161 (PaymentMethod::Wire, 0.15),
162 (PaymentMethod::CreditCard, 0.05),
163 ],
164 payment_behavior: O2CPaymentBehavior {
165 partial_payment_rate: payment_behavior.partial_payments.rate,
166 short_payment_rate: payment_behavior.short_payments.rate,
167 max_short_percent: payment_behavior.short_payments.max_short_percent,
168 on_account_rate: payment_behavior.on_account_payments.rate,
169 payment_correction_rate: payment_behavior.payment_corrections.rate,
170 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
171 },
172 }
173}
174
175#[derive(Debug, Clone)]
177pub struct PhaseConfig {
178 pub generate_master_data: bool,
180 pub generate_document_flows: bool,
182 pub generate_ocpm_events: bool,
184 pub generate_journal_entries: bool,
186 pub inject_anomalies: bool,
188 pub inject_data_quality: bool,
190 pub validate_balances: bool,
192 pub show_progress: bool,
194 pub vendors_per_company: usize,
196 pub customers_per_company: usize,
198 pub materials_per_company: usize,
200 pub assets_per_company: usize,
202 pub employees_per_company: usize,
204 pub p2p_chains: usize,
206 pub o2c_chains: usize,
208 pub generate_audit: bool,
210 pub audit_engagements: usize,
212 pub workpapers_per_engagement: usize,
214 pub evidence_per_workpaper: usize,
216 pub risks_per_engagement: usize,
218 pub findings_per_engagement: usize,
220 pub judgments_per_engagement: usize,
222 pub generate_banking: bool,
224 pub generate_graph_export: bool,
226}
227
228impl Default for PhaseConfig {
229 fn default() -> Self {
230 Self {
231 generate_master_data: true,
232 generate_document_flows: true,
233 generate_ocpm_events: false, generate_journal_entries: true,
235 inject_anomalies: false,
236 inject_data_quality: false, validate_balances: true,
238 show_progress: true,
239 vendors_per_company: 50,
240 customers_per_company: 100,
241 materials_per_company: 200,
242 assets_per_company: 50,
243 employees_per_company: 100,
244 p2p_chains: 100,
245 o2c_chains: 100,
246 generate_audit: false, audit_engagements: 5,
248 workpapers_per_engagement: 20,
249 evidence_per_workpaper: 5,
250 risks_per_engagement: 15,
251 findings_per_engagement: 8,
252 judgments_per_engagement: 10,
253 generate_banking: false, generate_graph_export: false, }
256 }
257}
258
259#[derive(Debug, Clone, Default)]
261pub struct MasterDataSnapshot {
262 pub vendors: Vec<Vendor>,
264 pub customers: Vec<Customer>,
266 pub materials: Vec<Material>,
268 pub assets: Vec<FixedAsset>,
270 pub employees: Vec<Employee>,
272}
273
274#[derive(Debug, Clone)]
276pub struct HypergraphExportInfo {
277 pub node_count: usize,
279 pub edge_count: usize,
281 pub hyperedge_count: usize,
283 pub output_path: PathBuf,
285}
286
287#[derive(Debug, Clone, Default)]
289pub struct DocumentFlowSnapshot {
290 pub p2p_chains: Vec<P2PDocumentChain>,
292 pub o2c_chains: Vec<O2CDocumentChain>,
294 pub purchase_orders: Vec<documents::PurchaseOrder>,
296 pub goods_receipts: Vec<documents::GoodsReceipt>,
298 pub vendor_invoices: Vec<documents::VendorInvoice>,
300 pub sales_orders: Vec<documents::SalesOrder>,
302 pub deliveries: Vec<documents::Delivery>,
304 pub customer_invoices: Vec<documents::CustomerInvoice>,
306 pub payments: Vec<documents::Payment>,
308}
309
310#[derive(Debug, Clone, Default)]
312pub struct SubledgerSnapshot {
313 pub ap_invoices: Vec<APInvoice>,
315 pub ar_invoices: Vec<ARInvoice>,
317}
318
319#[derive(Debug, Clone, Default)]
321pub struct OcpmSnapshot {
322 pub event_log: Option<OcpmEventLog>,
324 pub event_count: usize,
326 pub object_count: usize,
328 pub case_count: usize,
330}
331
332#[derive(Debug, Clone, Default)]
334pub struct AuditSnapshot {
335 pub engagements: Vec<AuditEngagement>,
337 pub workpapers: Vec<Workpaper>,
339 pub evidence: Vec<AuditEvidence>,
341 pub risk_assessments: Vec<RiskAssessment>,
343 pub findings: Vec<AuditFinding>,
345 pub judgments: Vec<ProfessionalJudgment>,
347}
348
349#[derive(Debug, Clone, Default)]
351pub struct BankingSnapshot {
352 pub customers: Vec<BankingCustomer>,
354 pub accounts: Vec<BankAccount>,
356 pub transactions: Vec<BankTransaction>,
358 pub suspicious_count: usize,
360 pub scenario_count: usize,
362}
363
364#[derive(Debug, Clone, Default)]
366pub struct GraphExportSnapshot {
367 pub exported: bool,
369 pub graph_count: usize,
371 pub exports: HashMap<String, GraphExportInfo>,
373}
374
375#[derive(Debug, Clone)]
377pub struct GraphExportInfo {
378 pub name: String,
380 pub format: String,
382 pub output_path: PathBuf,
384 pub node_count: usize,
386 pub edge_count: usize,
388}
389
390#[derive(Debug, Clone, Default)]
392pub struct AnomalyLabels {
393 pub labels: Vec<LabeledAnomaly>,
395 pub summary: Option<AnomalySummary>,
397 pub by_type: HashMap<String, usize>,
399}
400
401#[derive(Debug, Clone, Default)]
403pub struct BalanceValidationResult {
404 pub validated: bool,
406 pub is_balanced: bool,
408 pub entries_processed: u64,
410 pub total_debits: rust_decimal::Decimal,
412 pub total_credits: rust_decimal::Decimal,
414 pub accounts_tracked: usize,
416 pub companies_tracked: usize,
418 pub validation_errors: Vec<ValidationError>,
420 pub has_unbalanced_entries: bool,
422}
423
424#[derive(Debug)]
426pub struct EnhancedGenerationResult {
427 pub chart_of_accounts: ChartOfAccounts,
429 pub master_data: MasterDataSnapshot,
431 pub document_flows: DocumentFlowSnapshot,
433 pub subledger: SubledgerSnapshot,
435 pub ocpm: OcpmSnapshot,
437 pub audit: AuditSnapshot,
439 pub banking: BankingSnapshot,
441 pub graph_export: GraphExportSnapshot,
443 pub journal_entries: Vec<JournalEntry>,
445 pub anomaly_labels: AnomalyLabels,
447 pub balance_validation: BalanceValidationResult,
449 pub data_quality_stats: DataQualityStats,
451 pub statistics: EnhancedGenerationStatistics,
453 pub lineage: Option<super::lineage::LineageGraph>,
455 pub gate_result: Option<datasynth_eval::gates::GateResult>,
457}
458
459#[derive(Debug, Clone, Default, Serialize, Deserialize)]
461pub struct EnhancedGenerationStatistics {
462 pub total_entries: u64,
464 pub total_line_items: u64,
466 pub accounts_count: usize,
468 pub companies_count: usize,
470 pub period_months: u32,
472 pub vendor_count: usize,
474 pub customer_count: usize,
475 pub material_count: usize,
476 pub asset_count: usize,
477 pub employee_count: usize,
478 pub p2p_chain_count: usize,
480 pub o2c_chain_count: usize,
481 pub ap_invoice_count: usize,
483 pub ar_invoice_count: usize,
484 pub ocpm_event_count: usize,
486 pub ocpm_object_count: usize,
487 pub ocpm_case_count: usize,
488 pub audit_engagement_count: usize,
490 pub audit_workpaper_count: usize,
491 pub audit_evidence_count: usize,
492 pub audit_risk_count: usize,
493 pub audit_finding_count: usize,
494 pub audit_judgment_count: usize,
495 pub anomalies_injected: usize,
497 pub data_quality_issues: usize,
499 pub banking_customer_count: usize,
501 pub banking_account_count: usize,
502 pub banking_transaction_count: usize,
503 pub banking_suspicious_count: usize,
504 pub graph_export_count: usize,
506 pub graph_node_count: usize,
507 pub graph_edge_count: usize,
508 #[serde(default)]
510 pub llm_enrichment_ms: u64,
511 #[serde(default)]
513 pub llm_vendors_enriched: usize,
514 #[serde(default)]
516 pub diffusion_enhancement_ms: u64,
517 #[serde(default)]
519 pub diffusion_samples_generated: usize,
520 #[serde(default)]
522 pub causal_generation_ms: u64,
523 #[serde(default)]
525 pub causal_samples_generated: usize,
526 #[serde(default)]
528 pub causal_validation_passed: Option<bool>,
529}
530
531pub struct EnhancedOrchestrator {
533 config: GeneratorConfig,
534 phase_config: PhaseConfig,
535 coa: Option<Arc<ChartOfAccounts>>,
536 master_data: MasterDataSnapshot,
537 seed: u64,
538 multi_progress: Option<MultiProgress>,
539 resource_guard: ResourceGuard,
541 output_path: Option<PathBuf>,
543 copula_generators: Vec<CopulaGeneratorSpec>,
545}
546
547impl EnhancedOrchestrator {
548 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
550 datasynth_config::validate_config(&config)?;
551
552 let seed = config.global.seed.unwrap_or_else(rand::random);
553
554 let resource_guard = Self::build_resource_guard(&config, None);
556
557 Ok(Self {
558 config,
559 phase_config,
560 coa: None,
561 master_data: MasterDataSnapshot::default(),
562 seed,
563 multi_progress: None,
564 resource_guard,
565 output_path: None,
566 copula_generators: Vec::new(),
567 })
568 }
569
570 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
572 Self::new(config, PhaseConfig::default())
573 }
574
575 pub fn with_progress(mut self, show: bool) -> Self {
577 self.phase_config.show_progress = show;
578 if show {
579 self.multi_progress = Some(MultiProgress::new());
580 }
581 self
582 }
583
584 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
586 let path = path.into();
587 self.output_path = Some(path.clone());
588 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
590 self
591 }
592
593 pub fn has_copulas(&self) -> bool {
598 !self.copula_generators.is_empty()
599 }
600
601 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
607 &self.copula_generators
608 }
609
610 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
614 &mut self.copula_generators
615 }
616
617 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
621 self.copula_generators
622 .iter_mut()
623 .find(|c| c.name == copula_name)
624 .map(|c| c.generator.sample())
625 }
626
627 pub fn from_fingerprint(
650 fingerprint_path: &std::path::Path,
651 phase_config: PhaseConfig,
652 scale: f64,
653 ) -> SynthResult<Self> {
654 info!("Loading fingerprint from: {}", fingerprint_path.display());
655
656 let reader = FingerprintReader::new();
658 let fingerprint = reader
659 .read_from_file(fingerprint_path)
660 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
661
662 Self::from_fingerprint_data(fingerprint, phase_config, scale)
663 }
664
665 pub fn from_fingerprint_data(
672 fingerprint: Fingerprint,
673 phase_config: PhaseConfig,
674 scale: f64,
675 ) -> SynthResult<Self> {
676 info!(
677 "Synthesizing config from fingerprint (version: {}, tables: {})",
678 fingerprint.manifest.version,
679 fingerprint.schema.tables.len()
680 );
681
682 let seed: u64 = rand::random();
684
685 let options = SynthesisOptions {
687 scale,
688 seed: Some(seed),
689 preserve_correlations: true,
690 inject_anomalies: true,
691 };
692 let synthesizer = ConfigSynthesizer::with_options(options);
693
694 let synthesis_result = synthesizer
696 .synthesize_full(&fingerprint, seed)
697 .map_err(|e| {
698 SynthError::config(format!(
699 "Failed to synthesize config from fingerprint: {}",
700 e
701 ))
702 })?;
703
704 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
706 Self::base_config_for_industry(industry)
707 } else {
708 Self::base_config_for_industry("manufacturing")
709 };
710
711 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
713
714 info!(
716 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
717 fingerprint.schema.tables.len(),
718 scale,
719 synthesis_result.copula_generators.len()
720 );
721
722 if !synthesis_result.copula_generators.is_empty() {
723 for spec in &synthesis_result.copula_generators {
724 info!(
725 " Copula '{}' for table '{}': {} columns",
726 spec.name,
727 spec.table,
728 spec.columns.len()
729 );
730 }
731 }
732
733 let mut orchestrator = Self::new(config, phase_config)?;
735
736 orchestrator.copula_generators = synthesis_result.copula_generators;
738
739 Ok(orchestrator)
740 }
741
742 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
744 use datasynth_config::presets::create_preset;
745 use datasynth_config::TransactionVolume;
746 use datasynth_core::models::{CoAComplexity, IndustrySector};
747
748 let sector = match industry.to_lowercase().as_str() {
749 "manufacturing" => IndustrySector::Manufacturing,
750 "retail" => IndustrySector::Retail,
751 "financial" | "financial_services" => IndustrySector::FinancialServices,
752 "healthcare" => IndustrySector::Healthcare,
753 "technology" | "tech" => IndustrySector::Technology,
754 _ => IndustrySector::Manufacturing,
755 };
756
757 create_preset(
759 sector,
760 1, 12, CoAComplexity::Medium,
763 TransactionVolume::TenK,
764 )
765 }
766
767 fn apply_config_patch(
769 mut config: GeneratorConfig,
770 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
771 ) -> GeneratorConfig {
772 use datasynth_fingerprint::synthesis::ConfigValue;
773
774 for (key, value) in patch.values() {
775 match (key.as_str(), value) {
776 ("transactions.count", ConfigValue::Integer(n)) => {
779 info!(
780 "Fingerprint suggests {} transactions (apply via company volumes)",
781 n
782 );
783 }
784 ("global.period_months", ConfigValue::Integer(n)) => {
785 config.global.period_months = *n as u32;
786 }
787 ("global.start_date", ConfigValue::String(s)) => {
788 config.global.start_date = s.clone();
789 }
790 ("global.seed", ConfigValue::Integer(n)) => {
791 config.global.seed = Some(*n as u64);
792 }
793 ("fraud.enabled", ConfigValue::Bool(b)) => {
794 config.fraud.enabled = *b;
795 }
796 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
797 config.fraud.fraud_rate = *f;
798 }
799 ("data_quality.enabled", ConfigValue::Bool(b)) => {
800 config.data_quality.enabled = *b;
801 }
802 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
804 config.fraud.enabled = *b;
805 }
806 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
807 config.fraud.fraud_rate = *f;
808 }
809 _ => {
810 debug!("Ignoring unknown config patch key: {}", key);
811 }
812 }
813 }
814
815 config
816 }
817
818 fn build_resource_guard(
820 config: &GeneratorConfig,
821 output_path: Option<PathBuf>,
822 ) -> ResourceGuard {
823 let mut builder = ResourceGuardBuilder::new();
824
825 if config.global.memory_limit_mb > 0 {
827 builder = builder.memory_limit(config.global.memory_limit_mb);
828 }
829
830 if let Some(path) = output_path {
832 builder = builder.output_path(path).min_free_disk(100); }
834
835 builder = builder.conservative();
837
838 builder.build()
839 }
840
841 fn check_resources(&self) -> SynthResult<DegradationLevel> {
846 self.resource_guard.check()
847 }
848
849 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
851 let level = self.resource_guard.check()?;
852
853 if level != DegradationLevel::Normal {
854 warn!(
855 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
856 phase,
857 level,
858 self.resource_guard.current_memory_mb(),
859 self.resource_guard.available_disk_mb()
860 );
861 }
862
863 Ok(level)
864 }
865
866 fn get_degradation_actions(&self) -> DegradationActions {
868 self.resource_guard.get_actions()
869 }
870
871 fn check_memory_limit(&self) -> SynthResult<()> {
873 self.check_resources()?;
874 Ok(())
875 }
876
877 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
879 info!("Starting enhanced generation workflow");
880 info!(
881 "Config: industry={:?}, period_months={}, companies={}",
882 self.config.global.industry,
883 self.config.global.period_months,
884 self.config.companies.len()
885 );
886
887 let initial_level = self.check_resources_with_log("initial")?;
889 if initial_level == DegradationLevel::Emergency {
890 return Err(SynthError::resource(
891 "Insufficient resources to start generation",
892 ));
893 }
894
895 let mut stats = EnhancedGenerationStatistics {
896 companies_count: self.config.companies.len(),
897 period_months: self.config.global.period_months,
898 ..Default::default()
899 };
900
901 let coa = self.phase_chart_of_accounts(&mut stats)?;
903
904 self.phase_master_data(&mut stats)?;
906
907 let (document_flows, subledger) = self.phase_document_flows(&mut stats)?;
909
910 let ocpm = self.phase_ocpm_events(&document_flows, &mut stats)?;
912
913 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
915
916 let actions = self.get_degradation_actions();
918
919 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
921
922 let balance_validation = self.phase_balance_validation(&entries)?;
924
925 let data_quality_stats =
927 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
928
929 let audit = self.phase_audit_data(&entries, &mut stats)?;
931
932 let banking = self.phase_banking_data(&mut stats)?;
934
935 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
937
938 self.phase_hypergraph_export(&coa, &entries, &document_flows, &mut stats)?;
940
941 self.phase_llm_enrichment(&mut stats);
943
944 self.phase_diffusion_enhancement(&mut stats);
946
947 self.phase_causal_overlay(&mut stats);
949
950 let resource_stats = self.resource_guard.stats();
952 info!(
953 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
954 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
955 resource_stats.disk.estimated_bytes_written,
956 resource_stats.degradation_level
957 );
958
959 let lineage = self.build_lineage_graph();
961
962 Ok(EnhancedGenerationResult {
963 chart_of_accounts: (*coa).clone(),
964 master_data: self.master_data.clone(),
965 document_flows,
966 subledger,
967 ocpm,
968 audit,
969 banking,
970 graph_export,
971 journal_entries: entries,
972 anomaly_labels,
973 balance_validation,
974 data_quality_stats,
975 statistics: stats,
976 lineage: Some(lineage),
977 gate_result: None,
978 })
979 }
980
981 fn phase_chart_of_accounts(
987 &mut self,
988 stats: &mut EnhancedGenerationStatistics,
989 ) -> SynthResult<Arc<ChartOfAccounts>> {
990 info!("Phase 1: Generating Chart of Accounts");
991 let coa = self.generate_coa()?;
992 stats.accounts_count = coa.account_count();
993 info!(
994 "Chart of Accounts generated: {} accounts",
995 stats.accounts_count
996 );
997 self.check_resources_with_log("post-coa")?;
998 Ok(coa)
999 }
1000
1001 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1003 if self.phase_config.generate_master_data {
1004 info!("Phase 2: Generating Master Data");
1005 self.generate_master_data()?;
1006 stats.vendor_count = self.master_data.vendors.len();
1007 stats.customer_count = self.master_data.customers.len();
1008 stats.material_count = self.master_data.materials.len();
1009 stats.asset_count = self.master_data.assets.len();
1010 stats.employee_count = self.master_data.employees.len();
1011 info!(
1012 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1013 stats.vendor_count, stats.customer_count, stats.material_count,
1014 stats.asset_count, stats.employee_count
1015 );
1016 self.check_resources_with_log("post-master-data")?;
1017 } else {
1018 debug!("Phase 2: Skipped (master data generation disabled)");
1019 }
1020 Ok(())
1021 }
1022
1023 fn phase_document_flows(
1025 &mut self,
1026 stats: &mut EnhancedGenerationStatistics,
1027 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot)> {
1028 let mut document_flows = DocumentFlowSnapshot::default();
1029 let mut subledger = SubledgerSnapshot::default();
1030
1031 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1032 info!("Phase 3: Generating Document Flows");
1033 self.generate_document_flows(&mut document_flows)?;
1034 stats.p2p_chain_count = document_flows.p2p_chains.len();
1035 stats.o2c_chain_count = document_flows.o2c_chains.len();
1036 info!(
1037 "Document flows generated: {} P2P chains, {} O2C chains",
1038 stats.p2p_chain_count, stats.o2c_chain_count
1039 );
1040
1041 debug!("Phase 3b: Linking document flows to subledgers");
1043 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1044 stats.ap_invoice_count = subledger.ap_invoices.len();
1045 stats.ar_invoice_count = subledger.ar_invoices.len();
1046 debug!(
1047 "Subledgers linked: {} AP invoices, {} AR invoices",
1048 stats.ap_invoice_count, stats.ar_invoice_count
1049 );
1050
1051 self.check_resources_with_log("post-document-flows")?;
1052 } else {
1053 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1054 }
1055
1056 Ok((document_flows, subledger))
1057 }
1058
1059 fn phase_ocpm_events(
1061 &mut self,
1062 document_flows: &DocumentFlowSnapshot,
1063 stats: &mut EnhancedGenerationStatistics,
1064 ) -> SynthResult<OcpmSnapshot> {
1065 if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
1066 info!("Phase 3c: Generating OCPM Events");
1067 let ocpm_snapshot = self.generate_ocpm_events(document_flows)?;
1068 stats.ocpm_event_count = ocpm_snapshot.event_count;
1069 stats.ocpm_object_count = ocpm_snapshot.object_count;
1070 stats.ocpm_case_count = ocpm_snapshot.case_count;
1071 info!(
1072 "OCPM events generated: {} events, {} objects, {} cases",
1073 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1074 );
1075 self.check_resources_with_log("post-ocpm")?;
1076 Ok(ocpm_snapshot)
1077 } else {
1078 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1079 Ok(OcpmSnapshot::default())
1080 }
1081 }
1082
1083 fn phase_journal_entries(
1085 &mut self,
1086 coa: &Arc<ChartOfAccounts>,
1087 document_flows: &DocumentFlowSnapshot,
1088 stats: &mut EnhancedGenerationStatistics,
1089 ) -> SynthResult<Vec<JournalEntry>> {
1090 let mut entries = Vec::new();
1091
1092 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1094 debug!("Phase 4a: Generating JEs from document flows");
1095 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1096 debug!("Generated {} JEs from document flows", flow_entries.len());
1097 entries.extend(flow_entries);
1098 }
1099
1100 if self.phase_config.generate_journal_entries {
1102 info!("Phase 4: Generating Journal Entries");
1103 let je_entries = self.generate_journal_entries(coa)?;
1104 info!("Generated {} standalone journal entries", je_entries.len());
1105 entries.extend(je_entries);
1106 } else {
1107 debug!("Phase 4: Skipped (journal entry generation disabled)");
1108 }
1109
1110 if !entries.is_empty() {
1111 stats.total_entries = entries.len() as u64;
1112 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1113 info!(
1114 "Total entries: {}, total line items: {}",
1115 stats.total_entries, stats.total_line_items
1116 );
1117 self.check_resources_with_log("post-journal-entries")?;
1118 }
1119
1120 Ok(entries)
1121 }
1122
1123 fn phase_anomaly_injection(
1125 &mut self,
1126 entries: &mut [JournalEntry],
1127 actions: &DegradationActions,
1128 stats: &mut EnhancedGenerationStatistics,
1129 ) -> SynthResult<AnomalyLabels> {
1130 if self.phase_config.inject_anomalies
1131 && !entries.is_empty()
1132 && !actions.skip_anomaly_injection
1133 {
1134 info!("Phase 5: Injecting Anomalies");
1135 let result = self.inject_anomalies(entries)?;
1136 stats.anomalies_injected = result.labels.len();
1137 info!("Injected {} anomalies", stats.anomalies_injected);
1138 self.check_resources_with_log("post-anomaly-injection")?;
1139 Ok(result)
1140 } else if actions.skip_anomaly_injection {
1141 warn!("Phase 5: Skipped due to resource degradation");
1142 Ok(AnomalyLabels::default())
1143 } else {
1144 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1145 Ok(AnomalyLabels::default())
1146 }
1147 }
1148
1149 fn phase_balance_validation(
1151 &mut self,
1152 entries: &[JournalEntry],
1153 ) -> SynthResult<BalanceValidationResult> {
1154 if self.phase_config.validate_balances && !entries.is_empty() {
1155 debug!("Phase 6: Validating Balances");
1156 let balance_validation = self.validate_journal_entries(entries)?;
1157 if balance_validation.is_balanced {
1158 debug!("Balance validation passed");
1159 } else {
1160 warn!(
1161 "Balance validation found {} errors",
1162 balance_validation.validation_errors.len()
1163 );
1164 }
1165 Ok(balance_validation)
1166 } else {
1167 Ok(BalanceValidationResult::default())
1168 }
1169 }
1170
1171 fn phase_data_quality_injection(
1173 &mut self,
1174 entries: &mut [JournalEntry],
1175 actions: &DegradationActions,
1176 stats: &mut EnhancedGenerationStatistics,
1177 ) -> SynthResult<DataQualityStats> {
1178 if self.phase_config.inject_data_quality
1179 && !entries.is_empty()
1180 && !actions.skip_data_quality
1181 {
1182 info!("Phase 7: Injecting Data Quality Variations");
1183 let dq_stats = self.inject_data_quality(entries)?;
1184 stats.data_quality_issues = dq_stats.records_with_issues;
1185 info!("Injected {} data quality issues", stats.data_quality_issues);
1186 self.check_resources_with_log("post-data-quality")?;
1187 Ok(dq_stats)
1188 } else if actions.skip_data_quality {
1189 warn!("Phase 7: Skipped due to resource degradation");
1190 Ok(DataQualityStats::default())
1191 } else {
1192 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1193 Ok(DataQualityStats::default())
1194 }
1195 }
1196
1197 fn phase_audit_data(
1199 &mut self,
1200 entries: &[JournalEntry],
1201 stats: &mut EnhancedGenerationStatistics,
1202 ) -> SynthResult<AuditSnapshot> {
1203 if self.phase_config.generate_audit {
1204 info!("Phase 8: Generating Audit Data");
1205 let audit_snapshot = self.generate_audit_data(entries)?;
1206 stats.audit_engagement_count = audit_snapshot.engagements.len();
1207 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1208 stats.audit_evidence_count = audit_snapshot.evidence.len();
1209 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1210 stats.audit_finding_count = audit_snapshot.findings.len();
1211 stats.audit_judgment_count = audit_snapshot.judgments.len();
1212 info!(
1213 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1214 stats.audit_engagement_count, stats.audit_workpaper_count,
1215 stats.audit_evidence_count, stats.audit_risk_count,
1216 stats.audit_finding_count, stats.audit_judgment_count
1217 );
1218 self.check_resources_with_log("post-audit")?;
1219 Ok(audit_snapshot)
1220 } else {
1221 debug!("Phase 8: Skipped (audit generation disabled)");
1222 Ok(AuditSnapshot::default())
1223 }
1224 }
1225
1226 fn phase_banking_data(
1228 &mut self,
1229 stats: &mut EnhancedGenerationStatistics,
1230 ) -> SynthResult<BankingSnapshot> {
1231 if self.phase_config.generate_banking && self.config.banking.enabled {
1232 info!("Phase 9: Generating Banking KYC/AML Data");
1233 let banking_snapshot = self.generate_banking_data()?;
1234 stats.banking_customer_count = banking_snapshot.customers.len();
1235 stats.banking_account_count = banking_snapshot.accounts.len();
1236 stats.banking_transaction_count = banking_snapshot.transactions.len();
1237 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1238 info!(
1239 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1240 stats.banking_customer_count, stats.banking_account_count,
1241 stats.banking_transaction_count, stats.banking_suspicious_count
1242 );
1243 self.check_resources_with_log("post-banking")?;
1244 Ok(banking_snapshot)
1245 } else {
1246 debug!("Phase 9: Skipped (banking generation disabled)");
1247 Ok(BankingSnapshot::default())
1248 }
1249 }
1250
1251 fn phase_graph_export(
1253 &mut self,
1254 entries: &[JournalEntry],
1255 coa: &Arc<ChartOfAccounts>,
1256 stats: &mut EnhancedGenerationStatistics,
1257 ) -> SynthResult<GraphExportSnapshot> {
1258 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
1259 && !entries.is_empty()
1260 {
1261 info!("Phase 10: Exporting Accounting Network Graphs");
1262 match self.export_graphs(entries, coa, stats) {
1263 Ok(snapshot) => {
1264 info!(
1265 "Graph export complete: {} graphs ({} nodes, {} edges)",
1266 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1267 );
1268 Ok(snapshot)
1269 }
1270 Err(e) => {
1271 warn!("Phase 10: Graph export failed: {}", e);
1272 Ok(GraphExportSnapshot::default())
1273 }
1274 }
1275 } else {
1276 debug!("Phase 10: Skipped (graph export disabled or no entries)");
1277 Ok(GraphExportSnapshot::default())
1278 }
1279 }
1280
1281 fn phase_hypergraph_export(
1283 &self,
1284 coa: &Arc<ChartOfAccounts>,
1285 entries: &[JournalEntry],
1286 document_flows: &DocumentFlowSnapshot,
1287 stats: &mut EnhancedGenerationStatistics,
1288 ) -> SynthResult<()> {
1289 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
1290 info!("Phase 10b: Exporting Multi-Layer Hypergraph");
1291 match self.export_hypergraph(coa, entries, document_flows, stats) {
1292 Ok(info) => {
1293 info!(
1294 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
1295 info.node_count, info.edge_count, info.hyperedge_count
1296 );
1297 }
1298 Err(e) => {
1299 warn!("Phase 10b: Hypergraph export failed: {}", e);
1300 }
1301 }
1302 } else {
1303 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
1304 }
1305 Ok(())
1306 }
1307
1308 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
1314 if !self.config.llm.enabled {
1315 debug!("Phase 11: Skipped (LLM enrichment disabled)");
1316 return;
1317 }
1318
1319 info!("Phase 11: Starting LLM Enrichment");
1320 let start = std::time::Instant::now();
1321
1322 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1323 let provider = Arc::new(MockLlmProvider::new(self.seed));
1324 let enricher = VendorLlmEnricher::new(provider);
1325
1326 let industry = format!("{:?}", self.config.global.industry);
1327 let max_enrichments = self
1328 .config
1329 .llm
1330 .max_vendor_enrichments
1331 .min(self.master_data.vendors.len());
1332
1333 let mut enriched_count = 0usize;
1334 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
1335 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
1336 Ok(name) => {
1337 vendor.name = name;
1338 enriched_count += 1;
1339 }
1340 Err(e) => {
1341 warn!(
1342 "LLM vendor enrichment failed for {}: {}",
1343 vendor.vendor_id, e
1344 );
1345 }
1346 }
1347 }
1348
1349 enriched_count
1350 }));
1351
1352 match result {
1353 Ok(enriched_count) => {
1354 stats.llm_vendors_enriched = enriched_count;
1355 let elapsed = start.elapsed();
1356 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1357 info!(
1358 "Phase 11 complete: {} vendors enriched in {}ms",
1359 enriched_count, stats.llm_enrichment_ms
1360 );
1361 }
1362 Err(_) => {
1363 let elapsed = start.elapsed();
1364 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1365 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
1366 }
1367 }
1368 }
1369
1370 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
1376 if !self.config.diffusion.enabled {
1377 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
1378 return;
1379 }
1380
1381 info!("Phase 12: Starting Diffusion Enhancement");
1382 let start = std::time::Instant::now();
1383
1384 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1385 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
1388
1389 let diffusion_config = DiffusionConfig {
1390 n_steps: self.config.diffusion.n_steps,
1391 seed: self.seed,
1392 ..Default::default()
1393 };
1394
1395 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
1396
1397 let n_samples = self.config.diffusion.sample_size;
1398 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
1400
1401 samples.len()
1402 }));
1403
1404 match result {
1405 Ok(sample_count) => {
1406 stats.diffusion_samples_generated = sample_count;
1407 let elapsed = start.elapsed();
1408 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1409 info!(
1410 "Phase 12 complete: {} diffusion samples generated in {}ms",
1411 sample_count, stats.diffusion_enhancement_ms
1412 );
1413 }
1414 Err(_) => {
1415 let elapsed = start.elapsed();
1416 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1417 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
1418 }
1419 }
1420 }
1421
1422 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
1429 if !self.config.causal.enabled {
1430 debug!("Phase 13: Skipped (causal generation disabled)");
1431 return;
1432 }
1433
1434 info!("Phase 13: Starting Causal Overlay");
1435 let start = std::time::Instant::now();
1436
1437 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1438 let graph = match self.config.causal.template.as_str() {
1440 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
1441 _ => CausalGraph::fraud_detection_template(),
1442 };
1443
1444 let scm = StructuralCausalModel::new(graph.clone())
1445 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
1446
1447 let n_samples = self.config.causal.sample_size;
1448 let samples = scm
1449 .generate(n_samples, self.seed)
1450 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
1451
1452 let validation_passed = if self.config.causal.validate {
1454 let report = CausalValidator::validate_causal_structure(&samples, &graph);
1455 if report.valid {
1456 info!(
1457 "Causal validation passed: all {} checks OK",
1458 report.checks.len()
1459 );
1460 } else {
1461 warn!(
1462 "Causal validation: {} violations detected: {:?}",
1463 report.violations.len(),
1464 report.violations
1465 );
1466 }
1467 Some(report.valid)
1468 } else {
1469 None
1470 };
1471
1472 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
1473 }));
1474
1475 match result {
1476 Ok(Ok((sample_count, validation_passed))) => {
1477 stats.causal_samples_generated = sample_count;
1478 stats.causal_validation_passed = validation_passed;
1479 let elapsed = start.elapsed();
1480 stats.causal_generation_ms = elapsed.as_millis() as u64;
1481 info!(
1482 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
1483 sample_count, stats.causal_generation_ms, validation_passed,
1484 );
1485 }
1486 Ok(Err(e)) => {
1487 let elapsed = start.elapsed();
1488 stats.causal_generation_ms = elapsed.as_millis() as u64;
1489 warn!("Phase 13: Causal generation failed: {}", e);
1490 }
1491 Err(_) => {
1492 let elapsed = start.elapsed();
1493 stats.causal_generation_ms = elapsed.as_millis() as u64;
1494 warn!("Phase 13: Causal generation failed (panic caught), continuing");
1495 }
1496 }
1497 }
1498
1499 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
1501 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
1502
1503 let mut gen = ChartOfAccountsGenerator::new(
1504 self.config.chart_of_accounts.complexity,
1505 self.config.global.industry,
1506 self.seed,
1507 );
1508
1509 let coa = Arc::new(gen.generate());
1510 self.coa = Some(Arc::clone(&coa));
1511
1512 if let Some(pb) = pb {
1513 pb.finish_with_message("Chart of Accounts complete");
1514 }
1515
1516 Ok(coa)
1517 }
1518
1519 fn generate_master_data(&mut self) -> SynthResult<()> {
1521 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1522 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1523 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1524
1525 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
1527
1528 for (i, company) in self.config.companies.iter().enumerate() {
1529 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
1530
1531 let mut vendor_gen = VendorGenerator::new(company_seed);
1533 let vendor_pool = vendor_gen.generate_vendor_pool(
1534 self.phase_config.vendors_per_company,
1535 &company.code,
1536 start_date,
1537 );
1538 self.master_data.vendors.extend(vendor_pool.vendors);
1539 if let Some(pb) = &pb {
1540 pb.inc(1);
1541 }
1542
1543 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
1545 let customer_pool = customer_gen.generate_customer_pool(
1546 self.phase_config.customers_per_company,
1547 &company.code,
1548 start_date,
1549 );
1550 self.master_data.customers.extend(customer_pool.customers);
1551 if let Some(pb) = &pb {
1552 pb.inc(1);
1553 }
1554
1555 let mut material_gen = MaterialGenerator::new(company_seed + 200);
1557 let material_pool = material_gen.generate_material_pool(
1558 self.phase_config.materials_per_company,
1559 &company.code,
1560 start_date,
1561 );
1562 self.master_data.materials.extend(material_pool.materials);
1563 if let Some(pb) = &pb {
1564 pb.inc(1);
1565 }
1566
1567 let mut asset_gen = AssetGenerator::new(company_seed + 300);
1569 let asset_pool = asset_gen.generate_asset_pool(
1570 self.phase_config.assets_per_company,
1571 &company.code,
1572 (start_date, end_date),
1573 );
1574 self.master_data.assets.extend(asset_pool.assets);
1575 if let Some(pb) = &pb {
1576 pb.inc(1);
1577 }
1578
1579 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
1581 let employee_pool =
1582 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
1583 self.master_data.employees.extend(employee_pool.employees);
1584 if let Some(pb) = &pb {
1585 pb.inc(1);
1586 }
1587 }
1588
1589 if let Some(pb) = pb {
1590 pb.finish_with_message("Master data generation complete");
1591 }
1592
1593 Ok(())
1594 }
1595
1596 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
1598 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1599 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1600
1601 let p2p_count = self
1603 .phase_config
1604 .p2p_chains
1605 .min(self.master_data.vendors.len() * 2);
1606 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
1607
1608 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
1610 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
1611
1612 for i in 0..p2p_count {
1613 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
1614 let materials: Vec<&Material> = self
1615 .master_data
1616 .materials
1617 .iter()
1618 .skip(i % self.master_data.materials.len().max(1))
1619 .take(2.min(self.master_data.materials.len()))
1620 .collect();
1621
1622 if materials.is_empty() {
1623 continue;
1624 }
1625
1626 let company = &self.config.companies[i % self.config.companies.len()];
1627 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
1628 let fiscal_period = po_date.month() as u8;
1629 let created_by = self
1630 .master_data
1631 .employees
1632 .first()
1633 .map(|e| e.user_id.as_str())
1634 .unwrap_or("SYSTEM");
1635
1636 let chain = p2p_gen.generate_chain(
1637 &company.code,
1638 vendor,
1639 &materials,
1640 po_date,
1641 start_date.year() as u16,
1642 fiscal_period,
1643 created_by,
1644 );
1645
1646 flows.purchase_orders.push(chain.purchase_order.clone());
1648 flows.goods_receipts.extend(chain.goods_receipts.clone());
1649 if let Some(vi) = &chain.vendor_invoice {
1650 flows.vendor_invoices.push(vi.clone());
1651 }
1652 if let Some(payment) = &chain.payment {
1653 flows.payments.push(payment.clone());
1654 }
1655 flows.p2p_chains.push(chain);
1656
1657 if let Some(pb) = &pb {
1658 pb.inc(1);
1659 }
1660 }
1661
1662 if let Some(pb) = pb {
1663 pb.finish_with_message("P2P document flows complete");
1664 }
1665
1666 let o2c_count = self
1668 .phase_config
1669 .o2c_chains
1670 .min(self.master_data.customers.len() * 2);
1671 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
1672
1673 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
1675 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
1676
1677 for i in 0..o2c_count {
1678 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
1679 let materials: Vec<&Material> = self
1680 .master_data
1681 .materials
1682 .iter()
1683 .skip(i % self.master_data.materials.len().max(1))
1684 .take(2.min(self.master_data.materials.len()))
1685 .collect();
1686
1687 if materials.is_empty() {
1688 continue;
1689 }
1690
1691 let company = &self.config.companies[i % self.config.companies.len()];
1692 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
1693 let fiscal_period = so_date.month() as u8;
1694 let created_by = self
1695 .master_data
1696 .employees
1697 .first()
1698 .map(|e| e.user_id.as_str())
1699 .unwrap_or("SYSTEM");
1700
1701 let chain = o2c_gen.generate_chain(
1702 &company.code,
1703 customer,
1704 &materials,
1705 so_date,
1706 start_date.year() as u16,
1707 fiscal_period,
1708 created_by,
1709 );
1710
1711 flows.sales_orders.push(chain.sales_order.clone());
1713 flows.deliveries.extend(chain.deliveries.clone());
1714 if let Some(ci) = &chain.customer_invoice {
1715 flows.customer_invoices.push(ci.clone());
1716 }
1717 if let Some(receipt) = &chain.customer_receipt {
1718 flows.payments.push(receipt.clone());
1719 }
1720 flows.o2c_chains.push(chain);
1721
1722 if let Some(pb) = &pb {
1723 pb.inc(1);
1724 }
1725 }
1726
1727 if let Some(pb) = pb {
1728 pb.finish_with_message("O2C document flows complete");
1729 }
1730
1731 Ok(())
1732 }
1733
1734 fn generate_journal_entries(
1736 &mut self,
1737 coa: &Arc<ChartOfAccounts>,
1738 ) -> SynthResult<Vec<JournalEntry>> {
1739 let total = self.calculate_total_transactions();
1740 let pb = self.create_progress_bar(total, "Generating Journal Entries");
1741
1742 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1743 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1744 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1745
1746 let company_codes: Vec<String> = self
1747 .config
1748 .companies
1749 .iter()
1750 .map(|c| c.code.clone())
1751 .collect();
1752
1753 let generator = JournalEntryGenerator::new_with_params(
1754 self.config.transactions.clone(),
1755 Arc::clone(coa),
1756 company_codes,
1757 start_date,
1758 end_date,
1759 self.seed,
1760 );
1761
1762 let mut generator = generator
1766 .with_master_data(
1767 &self.master_data.vendors,
1768 &self.master_data.customers,
1769 &self.master_data.materials,
1770 )
1771 .with_persona_errors(true)
1772 .with_fraud_config(self.config.fraud.clone());
1773
1774 if self.config.temporal.enabled {
1776 let drift_config = self.config.temporal.to_core_config();
1777 generator = generator.with_drift_config(drift_config, self.seed + 100);
1778 }
1779
1780 let mut entries = Vec::with_capacity(total as usize);
1781
1782 self.check_memory_limit()?;
1784
1785 const MEMORY_CHECK_INTERVAL: u64 = 1000;
1787
1788 for i in 0..total {
1789 let entry = generator.generate();
1790 entries.push(entry);
1791 if let Some(pb) = &pb {
1792 pb.inc(1);
1793 }
1794
1795 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
1797 self.check_memory_limit()?;
1798 }
1799 }
1800
1801 if let Some(pb) = pb {
1802 pb.finish_with_message("Journal entries complete");
1803 }
1804
1805 Ok(entries)
1806 }
1807
1808 fn generate_jes_from_document_flows(
1813 &mut self,
1814 flows: &DocumentFlowSnapshot,
1815 ) -> SynthResult<Vec<JournalEntry>> {
1816 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1817 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
1818
1819 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
1820 DocumentFlowJeConfig::default(),
1821 self.seed,
1822 );
1823 let mut entries = Vec::new();
1824
1825 for chain in &flows.p2p_chains {
1827 let chain_entries = generator.generate_from_p2p_chain(chain);
1828 entries.extend(chain_entries);
1829 if let Some(pb) = &pb {
1830 pb.inc(1);
1831 }
1832 }
1833
1834 for chain in &flows.o2c_chains {
1836 let chain_entries = generator.generate_from_o2c_chain(chain);
1837 entries.extend(chain_entries);
1838 if let Some(pb) = &pb {
1839 pb.inc(1);
1840 }
1841 }
1842
1843 if let Some(pb) = pb {
1844 pb.finish_with_message(format!(
1845 "Generated {} JEs from document flows",
1846 entries.len()
1847 ));
1848 }
1849
1850 Ok(entries)
1851 }
1852
1853 fn link_document_flows_to_subledgers(
1858 &mut self,
1859 flows: &DocumentFlowSnapshot,
1860 ) -> SynthResult<SubledgerSnapshot> {
1861 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
1862 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
1863
1864 let mut linker = DocumentFlowLinker::new();
1865
1866 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
1868 if let Some(pb) = &pb {
1869 pb.inc(flows.vendor_invoices.len() as u64);
1870 }
1871
1872 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
1874 if let Some(pb) = &pb {
1875 pb.inc(flows.customer_invoices.len() as u64);
1876 }
1877
1878 if let Some(pb) = pb {
1879 pb.finish_with_message(format!(
1880 "Linked {} AP and {} AR invoices",
1881 ap_invoices.len(),
1882 ar_invoices.len()
1883 ));
1884 }
1885
1886 Ok(SubledgerSnapshot {
1887 ap_invoices,
1888 ar_invoices,
1889 })
1890 }
1891
1892 fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
1897 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1898 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
1899
1900 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
1902 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
1903
1904 let ocpm_config = OcpmGeneratorConfig {
1906 generate_p2p: true,
1907 generate_o2c: true,
1908 happy_path_rate: 0.75,
1909 exception_path_rate: 0.20,
1910 error_path_rate: 0.05,
1911 add_duration_variability: true,
1912 duration_std_dev_factor: 0.3,
1913 };
1914 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
1915
1916 let available_users: Vec<String> = self
1918 .master_data
1919 .employees
1920 .iter()
1921 .take(20)
1922 .map(|e| e.user_id.clone())
1923 .collect();
1924
1925 for chain in &flows.p2p_chains {
1927 let po = &chain.purchase_order;
1928 let documents = P2pDocuments::new(
1929 &po.header.document_id,
1930 &po.vendor_id,
1931 &po.header.company_code,
1932 po.total_net_amount,
1933 &po.header.currency,
1934 )
1935 .with_goods_receipt(
1936 chain
1937 .goods_receipts
1938 .first()
1939 .map(|gr| gr.header.document_id.as_str())
1940 .unwrap_or(""),
1941 )
1942 .with_invoice(
1943 chain
1944 .vendor_invoice
1945 .as_ref()
1946 .map(|vi| vi.header.document_id.as_str())
1947 .unwrap_or(""),
1948 )
1949 .with_payment(
1950 chain
1951 .payment
1952 .as_ref()
1953 .map(|p| p.header.document_id.as_str())
1954 .unwrap_or(""),
1955 );
1956
1957 let start_time =
1958 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
1959 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
1960
1961 for event in result.events {
1963 event_log.add_event(event);
1964 }
1965 for object in result.objects {
1966 event_log.add_object(object);
1967 }
1968 for relationship in result.relationships {
1969 event_log.add_relationship(relationship);
1970 }
1971 event_log.add_case(result.case_trace);
1972
1973 if let Some(pb) = &pb {
1974 pb.inc(1);
1975 }
1976 }
1977
1978 for chain in &flows.o2c_chains {
1980 let so = &chain.sales_order;
1981 let documents = O2cDocuments::new(
1982 &so.header.document_id,
1983 &so.customer_id,
1984 &so.header.company_code,
1985 so.total_net_amount,
1986 &so.header.currency,
1987 )
1988 .with_delivery(
1989 chain
1990 .deliveries
1991 .first()
1992 .map(|d| d.header.document_id.as_str())
1993 .unwrap_or(""),
1994 )
1995 .with_invoice(
1996 chain
1997 .customer_invoice
1998 .as_ref()
1999 .map(|ci| ci.header.document_id.as_str())
2000 .unwrap_or(""),
2001 )
2002 .with_receipt(
2003 chain
2004 .customer_receipt
2005 .as_ref()
2006 .map(|r| r.header.document_id.as_str())
2007 .unwrap_or(""),
2008 );
2009
2010 let start_time =
2011 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
2012 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
2013
2014 for event in result.events {
2016 event_log.add_event(event);
2017 }
2018 for object in result.objects {
2019 event_log.add_object(object);
2020 }
2021 for relationship in result.relationships {
2022 event_log.add_relationship(relationship);
2023 }
2024 event_log.add_case(result.case_trace);
2025
2026 if let Some(pb) = &pb {
2027 pb.inc(1);
2028 }
2029 }
2030
2031 event_log.compute_variants();
2033
2034 let summary = event_log.summary();
2035
2036 if let Some(pb) = pb {
2037 pb.finish_with_message(format!(
2038 "Generated {} OCPM events, {} objects",
2039 summary.event_count, summary.object_count
2040 ));
2041 }
2042
2043 Ok(OcpmSnapshot {
2044 event_count: summary.event_count,
2045 object_count: summary.object_count,
2046 case_count: summary.case_count,
2047 event_log: Some(event_log),
2048 })
2049 }
2050
2051 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
2053 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
2054
2055 let anomaly_config = AnomalyInjectorConfig {
2056 rates: AnomalyRateConfig {
2057 total_rate: 0.02,
2058 ..Default::default()
2059 },
2060 seed: self.seed + 5000,
2061 ..Default::default()
2062 };
2063
2064 let mut injector = AnomalyInjector::new(anomaly_config);
2065 let result = injector.process_entries(entries);
2066
2067 if let Some(pb) = &pb {
2068 pb.inc(entries.len() as u64);
2069 pb.finish_with_message("Anomaly injection complete");
2070 }
2071
2072 let mut by_type = HashMap::new();
2073 for label in &result.labels {
2074 *by_type
2075 .entry(format!("{:?}", label.anomaly_type))
2076 .or_insert(0) += 1;
2077 }
2078
2079 Ok(AnomalyLabels {
2080 labels: result.labels,
2081 summary: Some(result.summary),
2082 by_type,
2083 })
2084 }
2085
2086 fn validate_journal_entries(
2095 &mut self,
2096 entries: &[JournalEntry],
2097 ) -> SynthResult<BalanceValidationResult> {
2098 let clean_entries: Vec<&JournalEntry> = entries
2100 .iter()
2101 .filter(|e| {
2102 e.header
2103 .header_text
2104 .as_ref()
2105 .map(|t| !t.contains("[HUMAN_ERROR:"))
2106 .unwrap_or(true)
2107 })
2108 .collect();
2109
2110 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
2111
2112 let config = BalanceTrackerConfig {
2114 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
2118 };
2119
2120 let mut tracker = RunningBalanceTracker::new(config);
2121
2122 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
2124 let errors = tracker.apply_entries(&clean_refs);
2125
2126 if let Some(pb) = &pb {
2127 pb.inc(entries.len() as u64);
2128 }
2129
2130 let has_unbalanced = tracker
2133 .get_validation_errors()
2134 .iter()
2135 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
2136
2137 let mut all_errors = errors;
2140 all_errors.extend(tracker.get_validation_errors().iter().cloned());
2141 let company_codes: Vec<String> = self
2142 .config
2143 .companies
2144 .iter()
2145 .map(|c| c.code.clone())
2146 .collect();
2147
2148 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2149 .map(|d| d + chrono::Months::new(self.config.global.period_months))
2150 .unwrap_or_else(|_| chrono::Local::now().date_naive());
2151
2152 for company_code in &company_codes {
2153 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
2154 all_errors.push(e);
2155 }
2156 }
2157
2158 let stats = tracker.get_statistics();
2160
2161 let is_balanced = all_errors.is_empty();
2163
2164 if let Some(pb) = pb {
2165 let msg = if is_balanced {
2166 "Balance validation passed"
2167 } else {
2168 "Balance validation completed with errors"
2169 };
2170 pb.finish_with_message(msg);
2171 }
2172
2173 Ok(BalanceValidationResult {
2174 validated: true,
2175 is_balanced,
2176 entries_processed: stats.entries_processed,
2177 total_debits: stats.total_debits,
2178 total_credits: stats.total_credits,
2179 accounts_tracked: stats.accounts_tracked,
2180 companies_tracked: stats.companies_tracked,
2181 validation_errors: all_errors,
2182 has_unbalanced_entries: has_unbalanced,
2183 })
2184 }
2185
2186 fn inject_data_quality(
2191 &mut self,
2192 entries: &mut [JournalEntry],
2193 ) -> SynthResult<DataQualityStats> {
2194 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
2195
2196 let config = DataQualityConfig::minimal();
2198 let mut injector = DataQualityInjector::new(config);
2199
2200 let context = HashMap::new();
2202
2203 for entry in entries.iter_mut() {
2204 if let Some(text) = &entry.header.header_text {
2206 let processed = injector.process_text_field(
2207 "header_text",
2208 text,
2209 &entry.header.document_id.to_string(),
2210 &context,
2211 );
2212 match processed {
2213 Some(new_text) if new_text != *text => {
2214 entry.header.header_text = Some(new_text);
2215 }
2216 None => {
2217 entry.header.header_text = None; }
2219 _ => {}
2220 }
2221 }
2222
2223 if let Some(ref_text) = &entry.header.reference {
2225 let processed = injector.process_text_field(
2226 "reference",
2227 ref_text,
2228 &entry.header.document_id.to_string(),
2229 &context,
2230 );
2231 match processed {
2232 Some(new_text) if new_text != *ref_text => {
2233 entry.header.reference = Some(new_text);
2234 }
2235 None => {
2236 entry.header.reference = None;
2237 }
2238 _ => {}
2239 }
2240 }
2241
2242 let user_persona = entry.header.user_persona.clone();
2244 if let Some(processed) = injector.process_text_field(
2245 "user_persona",
2246 &user_persona,
2247 &entry.header.document_id.to_string(),
2248 &context,
2249 ) {
2250 if processed != user_persona {
2251 entry.header.user_persona = processed;
2252 }
2253 }
2254
2255 for line in &mut entry.lines {
2257 if let Some(ref text) = line.line_text {
2259 let processed = injector.process_text_field(
2260 "line_text",
2261 text,
2262 &entry.header.document_id.to_string(),
2263 &context,
2264 );
2265 match processed {
2266 Some(new_text) if new_text != *text => {
2267 line.line_text = Some(new_text);
2268 }
2269 None => {
2270 line.line_text = None;
2271 }
2272 _ => {}
2273 }
2274 }
2275
2276 if let Some(cc) = &line.cost_center {
2278 let processed = injector.process_text_field(
2279 "cost_center",
2280 cc,
2281 &entry.header.document_id.to_string(),
2282 &context,
2283 );
2284 match processed {
2285 Some(new_cc) if new_cc != *cc => {
2286 line.cost_center = Some(new_cc);
2287 }
2288 None => {
2289 line.cost_center = None;
2290 }
2291 _ => {}
2292 }
2293 }
2294 }
2295
2296 if let Some(pb) = &pb {
2297 pb.inc(1);
2298 }
2299 }
2300
2301 if let Some(pb) = pb {
2302 pb.finish_with_message("Data quality injection complete");
2303 }
2304
2305 Ok(injector.stats().clone())
2306 }
2307
2308 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
2319 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2320 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2321 let fiscal_year = start_date.year() as u16;
2322 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
2323
2324 let total_revenue: rust_decimal::Decimal = entries
2326 .iter()
2327 .flat_map(|e| e.lines.iter())
2328 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
2329 .map(|l| l.credit_amount)
2330 .sum();
2331
2332 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
2334
2335 let mut snapshot = AuditSnapshot::default();
2336
2337 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
2339 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
2340 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
2341 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
2342 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
2343 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
2344
2345 let accounts: Vec<String> = self
2347 .coa
2348 .as_ref()
2349 .map(|coa| {
2350 coa.get_postable_accounts()
2351 .iter()
2352 .map(|acc| acc.account_code().to_string())
2353 .collect()
2354 })
2355 .unwrap_or_default();
2356
2357 for (i, company) in self.config.companies.iter().enumerate() {
2359 let company_revenue = total_revenue
2361 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
2362
2363 let engagements_for_company =
2365 self.phase_config.audit_engagements / self.config.companies.len().max(1);
2366 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
2367 1
2368 } else {
2369 0
2370 };
2371
2372 for _eng_idx in 0..(engagements_for_company + extra) {
2373 let engagement = engagement_gen.generate_engagement(
2375 &company.code,
2376 &company.name,
2377 fiscal_year,
2378 period_end,
2379 company_revenue,
2380 None, );
2382
2383 if let Some(pb) = &pb {
2384 pb.inc(1);
2385 }
2386
2387 let team_members: Vec<String> = engagement.team_member_ids.clone();
2389
2390 let workpapers =
2392 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
2393
2394 for wp in &workpapers {
2395 if let Some(pb) = &pb {
2396 pb.inc(1);
2397 }
2398
2399 let evidence = evidence_gen.generate_evidence_for_workpaper(
2401 wp,
2402 &team_members,
2403 wp.preparer_date,
2404 );
2405
2406 for _ in &evidence {
2407 if let Some(pb) = &pb {
2408 pb.inc(1);
2409 }
2410 }
2411
2412 snapshot.evidence.extend(evidence);
2413 }
2414
2415 let risks =
2417 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
2418
2419 for _ in &risks {
2420 if let Some(pb) = &pb {
2421 pb.inc(1);
2422 }
2423 }
2424 snapshot.risk_assessments.extend(risks);
2425
2426 let findings = finding_gen.generate_findings_for_engagement(
2428 &engagement,
2429 &workpapers,
2430 &team_members,
2431 );
2432
2433 for _ in &findings {
2434 if let Some(pb) = &pb {
2435 pb.inc(1);
2436 }
2437 }
2438 snapshot.findings.extend(findings);
2439
2440 let judgments =
2442 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
2443
2444 for _ in &judgments {
2445 if let Some(pb) = &pb {
2446 pb.inc(1);
2447 }
2448 }
2449 snapshot.judgments.extend(judgments);
2450
2451 snapshot.workpapers.extend(workpapers);
2453 snapshot.engagements.push(engagement);
2454 }
2455 }
2456
2457 if let Some(pb) = pb {
2458 pb.finish_with_message(format!(
2459 "Audit data: {} engagements, {} workpapers, {} evidence",
2460 snapshot.engagements.len(),
2461 snapshot.workpapers.len(),
2462 snapshot.evidence.len()
2463 ));
2464 }
2465
2466 Ok(snapshot)
2467 }
2468
2469 fn export_graphs(
2476 &mut self,
2477 entries: &[JournalEntry],
2478 _coa: &Arc<ChartOfAccounts>,
2479 stats: &mut EnhancedGenerationStatistics,
2480 ) -> SynthResult<GraphExportSnapshot> {
2481 let pb = self.create_progress_bar(100, "Exporting Graphs");
2482
2483 let mut snapshot = GraphExportSnapshot::default();
2484
2485 let output_dir = self
2487 .output_path
2488 .clone()
2489 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
2490 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
2491
2492 for graph_type in &self.config.graph_export.graph_types {
2494 if let Some(pb) = &pb {
2495 pb.inc(10);
2496 }
2497
2498 let graph_config = TransactionGraphConfig {
2500 include_vendors: false,
2501 include_customers: false,
2502 create_debit_credit_edges: true,
2503 include_document_nodes: graph_type.include_document_nodes,
2504 min_edge_weight: graph_type.min_edge_weight,
2505 aggregate_parallel_edges: graph_type.aggregate_edges,
2506 };
2507
2508 let mut builder = TransactionGraphBuilder::new(graph_config);
2509 builder.add_journal_entries(entries);
2510 let graph = builder.build();
2511
2512 stats.graph_node_count += graph.node_count();
2514 stats.graph_edge_count += graph.edge_count();
2515
2516 if let Some(pb) = &pb {
2517 pb.inc(40);
2518 }
2519
2520 for format in &self.config.graph_export.formats {
2522 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
2523
2524 if let Err(e) = std::fs::create_dir_all(&format_dir) {
2526 warn!("Failed to create graph output directory: {}", e);
2527 continue;
2528 }
2529
2530 match format {
2531 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
2532 let pyg_config = PyGExportConfig {
2533 common: datasynth_graph::CommonExportConfig {
2534 export_node_features: true,
2535 export_edge_features: true,
2536 export_node_labels: true,
2537 export_edge_labels: true,
2538 export_masks: true,
2539 train_ratio: self.config.graph_export.train_ratio,
2540 val_ratio: self.config.graph_export.validation_ratio,
2541 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
2542 },
2543 one_hot_categoricals: false,
2544 };
2545
2546 let exporter = PyGExporter::new(pyg_config);
2547 match exporter.export(&graph, &format_dir) {
2548 Ok(metadata) => {
2549 snapshot.exports.insert(
2550 format!("{}_{}", graph_type.name, "pytorch_geometric"),
2551 GraphExportInfo {
2552 name: graph_type.name.clone(),
2553 format: "pytorch_geometric".to_string(),
2554 output_path: format_dir.clone(),
2555 node_count: metadata.num_nodes,
2556 edge_count: metadata.num_edges,
2557 },
2558 );
2559 snapshot.graph_count += 1;
2560 }
2561 Err(e) => {
2562 warn!("Failed to export PyTorch Geometric graph: {}", e);
2563 }
2564 }
2565 }
2566 datasynth_config::schema::GraphExportFormat::Neo4j => {
2567 debug!("Neo4j export not yet implemented for accounting networks");
2569 }
2570 datasynth_config::schema::GraphExportFormat::Dgl => {
2571 debug!("DGL export not yet implemented for accounting networks");
2573 }
2574 datasynth_config::schema::GraphExportFormat::RustGraph => {
2575 use datasynth_graph::{
2576 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
2577 };
2578
2579 let rustgraph_config = RustGraphExportConfig {
2580 include_features: true,
2581 include_temporal: true,
2582 include_labels: true,
2583 source_name: "datasynth".to_string(),
2584 batch_id: None,
2585 output_format: RustGraphOutputFormat::JsonLines,
2586 export_node_properties: true,
2587 export_edge_properties: true,
2588 pretty_print: false,
2589 };
2590
2591 let exporter = RustGraphExporter::new(rustgraph_config);
2592 match exporter.export(&graph, &format_dir) {
2593 Ok(metadata) => {
2594 snapshot.exports.insert(
2595 format!("{}_{}", graph_type.name, "rustgraph"),
2596 GraphExportInfo {
2597 name: graph_type.name.clone(),
2598 format: "rustgraph".to_string(),
2599 output_path: format_dir.clone(),
2600 node_count: metadata.num_nodes,
2601 edge_count: metadata.num_edges,
2602 },
2603 );
2604 snapshot.graph_count += 1;
2605 }
2606 Err(e) => {
2607 warn!("Failed to export RustGraph: {}", e);
2608 }
2609 }
2610 }
2611 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
2612 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
2614 }
2615 }
2616 }
2617
2618 if let Some(pb) = &pb {
2619 pb.inc(40);
2620 }
2621 }
2622
2623 stats.graph_export_count = snapshot.graph_count;
2624 snapshot.exported = snapshot.graph_count > 0;
2625
2626 if let Some(pb) = pb {
2627 pb.finish_with_message(format!(
2628 "Graphs exported: {} graphs ({} nodes, {} edges)",
2629 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2630 ));
2631 }
2632
2633 Ok(snapshot)
2634 }
2635
2636 fn export_hypergraph(
2643 &self,
2644 coa: &Arc<ChartOfAccounts>,
2645 entries: &[JournalEntry],
2646 document_flows: &DocumentFlowSnapshot,
2647 stats: &mut EnhancedGenerationStatistics,
2648 ) -> SynthResult<HypergraphExportInfo> {
2649 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
2650 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
2651 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
2652 use datasynth_graph::models::hypergraph::AggregationStrategy;
2653
2654 let hg_settings = &self.config.graph_export.hypergraph;
2655
2656 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
2658 "truncate" => AggregationStrategy::Truncate,
2659 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
2660 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
2661 "importance_sample" => AggregationStrategy::ImportanceSample,
2662 _ => AggregationStrategy::PoolByCounterparty,
2663 };
2664
2665 let builder_config = HypergraphConfig {
2666 max_nodes: hg_settings.max_nodes,
2667 aggregation_strategy,
2668 include_coso: hg_settings.governance_layer.include_coso,
2669 include_controls: hg_settings.governance_layer.include_controls,
2670 include_sox: hg_settings.governance_layer.include_sox,
2671 include_vendors: hg_settings.governance_layer.include_vendors,
2672 include_customers: hg_settings.governance_layer.include_customers,
2673 include_employees: hg_settings.governance_layer.include_employees,
2674 include_p2p: hg_settings.process_layer.include_p2p,
2675 include_o2c: hg_settings.process_layer.include_o2c,
2676 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
2677 docs_per_counterparty_threshold: hg_settings
2678 .process_layer
2679 .docs_per_counterparty_threshold,
2680 include_accounts: hg_settings.accounting_layer.include_accounts,
2681 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
2682 include_cross_layer_edges: hg_settings.cross_layer.enabled,
2683 };
2684
2685 let mut builder = HypergraphBuilder::new(builder_config);
2686
2687 builder.add_coso_framework();
2689
2690 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
2693 let controls = InternalControl::standard_controls();
2694 builder.add_controls(&controls);
2695 }
2696
2697 builder.add_vendors(&self.master_data.vendors);
2699 builder.add_customers(&self.master_data.customers);
2700 builder.add_employees(&self.master_data.employees);
2701
2702 builder.add_p2p_documents(
2704 &document_flows.purchase_orders,
2705 &document_flows.goods_receipts,
2706 &document_flows.vendor_invoices,
2707 &document_flows.payments,
2708 );
2709 builder.add_o2c_documents(
2710 &document_flows.sales_orders,
2711 &document_flows.deliveries,
2712 &document_flows.customer_invoices,
2713 );
2714
2715 builder.add_accounts(coa);
2717 builder.add_journal_entries_as_hyperedges(entries);
2718
2719 let hypergraph = builder.build();
2721
2722 let output_dir = self
2724 .output_path
2725 .clone()
2726 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
2727 let hg_dir = output_dir
2728 .join(&self.config.graph_export.output_subdirectory)
2729 .join(&hg_settings.output_subdirectory);
2730
2731 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
2733 "unified" => {
2734 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
2735 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
2736 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
2737 })?;
2738 (
2739 metadata.num_nodes,
2740 metadata.num_edges,
2741 metadata.num_hyperedges,
2742 )
2743 }
2744 _ => {
2745 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
2747 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
2748 SynthError::generation(format!("Hypergraph export failed: {}", e))
2749 })?;
2750 (
2751 metadata.num_nodes,
2752 metadata.num_edges,
2753 metadata.num_hyperedges,
2754 )
2755 }
2756 };
2757
2758 #[cfg(feature = "streaming")]
2760 if let Some(ref target_url) = hg_settings.stream_target {
2761 use crate::stream_client::{StreamClient, StreamConfig};
2762 use std::io::Write as _;
2763
2764 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
2765 let stream_config = StreamConfig {
2766 target_url: target_url.clone(),
2767 batch_size: hg_settings.stream_batch_size,
2768 api_key,
2769 ..StreamConfig::default()
2770 };
2771
2772 match StreamClient::new(stream_config) {
2773 Ok(mut client) => {
2774 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
2775 match exporter.export_to_writer(&hypergraph, &mut client) {
2776 Ok(_) => {
2777 if let Err(e) = client.flush() {
2778 warn!("Failed to flush stream client: {}", e);
2779 } else {
2780 info!("Streamed {} records to {}", client.total_sent(), target_url);
2781 }
2782 }
2783 Err(e) => {
2784 warn!("Streaming export failed: {}", e);
2785 }
2786 }
2787 }
2788 Err(e) => {
2789 warn!("Failed to create stream client: {}", e);
2790 }
2791 }
2792 }
2793
2794 stats.graph_node_count += num_nodes;
2796 stats.graph_edge_count += num_edges;
2797 stats.graph_export_count += 1;
2798
2799 Ok(HypergraphExportInfo {
2800 node_count: num_nodes,
2801 edge_count: num_edges,
2802 hyperedge_count: num_hyperedges,
2803 output_path: hg_dir,
2804 })
2805 }
2806
2807 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
2812 let pb = self.create_progress_bar(100, "Generating Banking Data");
2813
2814 let orchestrator = BankingOrchestratorBuilder::new()
2816 .config(self.config.banking.clone())
2817 .seed(self.seed + 9000)
2818 .build();
2819
2820 if let Some(pb) = &pb {
2821 pb.inc(10);
2822 }
2823
2824 let result = orchestrator.generate();
2826
2827 if let Some(pb) = &pb {
2828 pb.inc(90);
2829 pb.finish_with_message(format!(
2830 "Banking: {} customers, {} transactions",
2831 result.customers.len(),
2832 result.transactions.len()
2833 ));
2834 }
2835
2836 Ok(BankingSnapshot {
2837 customers: result.customers,
2838 accounts: result.accounts,
2839 transactions: result.transactions,
2840 suspicious_count: result.stats.suspicious_count,
2841 scenario_count: result.scenarios.len(),
2842 })
2843 }
2844
2845 fn calculate_total_transactions(&self) -> u64 {
2847 let months = self.config.global.period_months as f64;
2848 self.config
2849 .companies
2850 .iter()
2851 .map(|c| {
2852 let annual = c.annual_transaction_volume.count() as f64;
2853 let weighted = annual * c.volume_weight;
2854 (weighted * months / 12.0) as u64
2855 })
2856 .sum()
2857 }
2858
2859 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
2861 if !self.phase_config.show_progress {
2862 return None;
2863 }
2864
2865 let pb = if let Some(mp) = &self.multi_progress {
2866 mp.add(ProgressBar::new(total))
2867 } else {
2868 ProgressBar::new(total)
2869 };
2870
2871 pb.set_style(
2872 ProgressStyle::default_bar()
2873 .template(&format!(
2874 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
2875 message
2876 ))
2877 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
2878 .progress_chars("#>-"),
2879 );
2880
2881 Some(pb)
2882 }
2883
2884 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
2886 self.coa.clone()
2887 }
2888
2889 pub fn get_master_data(&self) -> &MasterDataSnapshot {
2891 &self.master_data
2892 }
2893
2894 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
2896 use super::lineage::LineageGraphBuilder;
2897
2898 let mut builder = LineageGraphBuilder::new();
2899
2900 builder.add_config_section("config:global", "Global Config");
2902 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
2903 builder.add_config_section("config:transactions", "Transaction Config");
2904
2905 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
2907 builder.add_generator_phase("phase:je", "Journal Entry Generation");
2908
2909 builder.configured_by("phase:coa", "config:chart_of_accounts");
2911 builder.configured_by("phase:je", "config:transactions");
2912
2913 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
2915 builder.produced_by("output:je", "phase:je");
2916
2917 if self.phase_config.generate_master_data {
2919 builder.add_config_section("config:master_data", "Master Data Config");
2920 builder.add_generator_phase("phase:master_data", "Master Data Generation");
2921 builder.configured_by("phase:master_data", "config:master_data");
2922 builder.input_to("phase:master_data", "phase:je");
2923 }
2924
2925 if self.phase_config.generate_document_flows {
2926 builder.add_config_section("config:document_flows", "Document Flow Config");
2927 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
2928 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
2929 builder.configured_by("phase:p2p", "config:document_flows");
2930 builder.configured_by("phase:o2c", "config:document_flows");
2931
2932 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
2933 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
2934 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
2935 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
2936 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
2937
2938 builder.produced_by("output:po", "phase:p2p");
2939 builder.produced_by("output:gr", "phase:p2p");
2940 builder.produced_by("output:vi", "phase:p2p");
2941 builder.produced_by("output:so", "phase:o2c");
2942 builder.produced_by("output:ci", "phase:o2c");
2943 }
2944
2945 if self.phase_config.inject_anomalies {
2946 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
2947 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
2948 builder.configured_by("phase:anomaly", "config:fraud");
2949 builder.add_output_file(
2950 "output:labels",
2951 "Anomaly Labels",
2952 "labels/anomaly_labels.csv",
2953 );
2954 builder.produced_by("output:labels", "phase:anomaly");
2955 }
2956
2957 if self.phase_config.generate_audit {
2958 builder.add_config_section("config:audit", "Audit Config");
2959 builder.add_generator_phase("phase:audit", "Audit Data Generation");
2960 builder.configured_by("phase:audit", "config:audit");
2961 }
2962
2963 if self.phase_config.generate_banking {
2964 builder.add_config_section("config:banking", "Banking Config");
2965 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
2966 builder.configured_by("phase:banking", "config:banking");
2967 }
2968
2969 if self.config.llm.enabled {
2970 builder.add_config_section("config:llm", "LLM Enrichment Config");
2971 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
2972 builder.configured_by("phase:llm_enrichment", "config:llm");
2973 }
2974
2975 if self.config.diffusion.enabled {
2976 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
2977 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
2978 builder.configured_by("phase:diffusion", "config:diffusion");
2979 }
2980
2981 if self.config.causal.enabled {
2982 builder.add_config_section("config:causal", "Causal Generation Config");
2983 builder.add_generator_phase("phase:causal", "Causal Overlay");
2984 builder.configured_by("phase:causal", "config:causal");
2985 }
2986
2987 builder.build()
2988 }
2989}
2990
2991fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
2993 match format {
2994 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
2995 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
2996 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
2997 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
2998 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
2999 }
3000}
3001
3002#[cfg(test)]
3003#[allow(clippy::unwrap_used)]
3004mod tests {
3005 use super::*;
3006 use datasynth_config::schema::*;
3007
3008 fn create_test_config() -> GeneratorConfig {
3009 GeneratorConfig {
3010 global: GlobalConfig {
3011 industry: IndustrySector::Manufacturing,
3012 start_date: "2024-01-01".to_string(),
3013 period_months: 1,
3014 seed: Some(42),
3015 parallel: false,
3016 group_currency: "USD".to_string(),
3017 worker_threads: 0,
3018 memory_limit_mb: 0,
3019 },
3020 companies: vec![CompanyConfig {
3021 code: "1000".to_string(),
3022 name: "Test Company".to_string(),
3023 currency: "USD".to_string(),
3024 country: "US".to_string(),
3025 annual_transaction_volume: TransactionVolume::TenK,
3026 volume_weight: 1.0,
3027 fiscal_year_variant: "K4".to_string(),
3028 }],
3029 chart_of_accounts: ChartOfAccountsConfig {
3030 complexity: CoAComplexity::Small,
3031 industry_specific: true,
3032 custom_accounts: None,
3033 min_hierarchy_depth: 2,
3034 max_hierarchy_depth: 4,
3035 },
3036 transactions: TransactionConfig::default(),
3037 output: OutputConfig::default(),
3038 fraud: FraudConfig::default(),
3039 internal_controls: InternalControlsConfig::default(),
3040 business_processes: BusinessProcessConfig::default(),
3041 user_personas: UserPersonaConfig::default(),
3042 templates: TemplateConfig::default(),
3043 approval: ApprovalConfig::default(),
3044 departments: DepartmentConfig::default(),
3045 master_data: MasterDataConfig::default(),
3046 document_flows: DocumentFlowConfig::default(),
3047 intercompany: IntercompanyConfig::default(),
3048 balance: BalanceConfig::default(),
3049 ocpm: OcpmConfig::default(),
3050 audit: AuditGenerationConfig::default(),
3051 banking: datasynth_banking::BankingConfig::default(),
3052 data_quality: DataQualitySchemaConfig::default(),
3053 scenario: ScenarioConfig::default(),
3054 temporal: TemporalDriftConfig::default(),
3055 graph_export: GraphExportConfig::default(),
3056 streaming: StreamingSchemaConfig::default(),
3057 rate_limit: RateLimitSchemaConfig::default(),
3058 temporal_attributes: TemporalAttributeSchemaConfig::default(),
3059 relationships: RelationshipSchemaConfig::default(),
3060 accounting_standards: AccountingStandardsConfig::default(),
3061 audit_standards: AuditStandardsConfig::default(),
3062 distributions: Default::default(),
3063 temporal_patterns: Default::default(),
3064 vendor_network: VendorNetworkSchemaConfig::default(),
3065 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
3066 relationship_strength: RelationshipStrengthSchemaConfig::default(),
3067 cross_process_links: CrossProcessLinksSchemaConfig::default(),
3068 organizational_events: OrganizationalEventsSchemaConfig::default(),
3069 behavioral_drift: BehavioralDriftSchemaConfig::default(),
3070 market_drift: MarketDriftSchemaConfig::default(),
3071 drift_labeling: DriftLabelingSchemaConfig::default(),
3072 anomaly_injection: Default::default(),
3073 industry_specific: Default::default(),
3074 fingerprint_privacy: Default::default(),
3075 quality_gates: Default::default(),
3076 compliance: Default::default(),
3077 webhooks: Default::default(),
3078 llm: Default::default(),
3079 diffusion: Default::default(),
3080 causal: Default::default(),
3081 }
3082 }
3083
3084 #[test]
3085 fn test_enhanced_orchestrator_creation() {
3086 let config = create_test_config();
3087 let orchestrator = EnhancedOrchestrator::with_defaults(config);
3088 assert!(orchestrator.is_ok());
3089 }
3090
3091 #[test]
3092 fn test_minimal_generation() {
3093 let config = create_test_config();
3094 let phase_config = PhaseConfig {
3095 generate_master_data: false,
3096 generate_document_flows: false,
3097 generate_journal_entries: true,
3098 inject_anomalies: false,
3099 show_progress: false,
3100 ..Default::default()
3101 };
3102
3103 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3104 let result = orchestrator.generate();
3105
3106 assert!(result.is_ok());
3107 let result = result.unwrap();
3108 assert!(!result.journal_entries.is_empty());
3109 }
3110
3111 #[test]
3112 fn test_master_data_generation() {
3113 let config = create_test_config();
3114 let phase_config = PhaseConfig {
3115 generate_master_data: true,
3116 generate_document_flows: false,
3117 generate_journal_entries: false,
3118 inject_anomalies: false,
3119 show_progress: false,
3120 vendors_per_company: 5,
3121 customers_per_company: 5,
3122 materials_per_company: 10,
3123 assets_per_company: 5,
3124 employees_per_company: 10,
3125 ..Default::default()
3126 };
3127
3128 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3129 let result = orchestrator.generate().unwrap();
3130
3131 assert!(!result.master_data.vendors.is_empty());
3132 assert!(!result.master_data.customers.is_empty());
3133 assert!(!result.master_data.materials.is_empty());
3134 }
3135
3136 #[test]
3137 fn test_document_flow_generation() {
3138 let config = create_test_config();
3139 let phase_config = PhaseConfig {
3140 generate_master_data: true,
3141 generate_document_flows: true,
3142 generate_journal_entries: false,
3143 inject_anomalies: false,
3144 inject_data_quality: false,
3145 validate_balances: false,
3146 generate_ocpm_events: false,
3147 show_progress: false,
3148 vendors_per_company: 5,
3149 customers_per_company: 5,
3150 materials_per_company: 10,
3151 assets_per_company: 5,
3152 employees_per_company: 10,
3153 p2p_chains: 5,
3154 o2c_chains: 5,
3155 ..Default::default()
3156 };
3157
3158 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3159 let result = orchestrator.generate().unwrap();
3160
3161 assert!(!result.document_flows.p2p_chains.is_empty());
3163 assert!(!result.document_flows.o2c_chains.is_empty());
3164
3165 assert!(!result.document_flows.purchase_orders.is_empty());
3167 assert!(!result.document_flows.sales_orders.is_empty());
3168 }
3169
3170 #[test]
3171 fn test_anomaly_injection() {
3172 let config = create_test_config();
3173 let phase_config = PhaseConfig {
3174 generate_master_data: false,
3175 generate_document_flows: false,
3176 generate_journal_entries: true,
3177 inject_anomalies: true,
3178 show_progress: false,
3179 ..Default::default()
3180 };
3181
3182 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3183 let result = orchestrator.generate().unwrap();
3184
3185 assert!(!result.journal_entries.is_empty());
3187
3188 assert!(result.anomaly_labels.summary.is_some());
3191 }
3192
3193 #[test]
3194 fn test_full_generation_pipeline() {
3195 let config = create_test_config();
3196 let phase_config = PhaseConfig {
3197 generate_master_data: true,
3198 generate_document_flows: true,
3199 generate_journal_entries: true,
3200 inject_anomalies: false,
3201 inject_data_quality: false,
3202 validate_balances: true,
3203 generate_ocpm_events: false,
3204 show_progress: false,
3205 vendors_per_company: 3,
3206 customers_per_company: 3,
3207 materials_per_company: 5,
3208 assets_per_company: 3,
3209 employees_per_company: 5,
3210 p2p_chains: 3,
3211 o2c_chains: 3,
3212 ..Default::default()
3213 };
3214
3215 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3216 let result = orchestrator.generate().unwrap();
3217
3218 assert!(!result.master_data.vendors.is_empty());
3220 assert!(!result.master_data.customers.is_empty());
3221 assert!(!result.document_flows.p2p_chains.is_empty());
3222 assert!(!result.document_flows.o2c_chains.is_empty());
3223 assert!(!result.journal_entries.is_empty());
3224 assert!(result.statistics.accounts_count > 0);
3225
3226 assert!(!result.subledger.ap_invoices.is_empty());
3228 assert!(!result.subledger.ar_invoices.is_empty());
3229
3230 assert!(result.balance_validation.validated);
3232 assert!(result.balance_validation.entries_processed > 0);
3233 }
3234
3235 #[test]
3236 fn test_subledger_linking() {
3237 let config = create_test_config();
3238 let phase_config = PhaseConfig {
3239 generate_master_data: true,
3240 generate_document_flows: true,
3241 generate_journal_entries: false,
3242 inject_anomalies: false,
3243 inject_data_quality: false,
3244 validate_balances: false,
3245 generate_ocpm_events: false,
3246 show_progress: false,
3247 vendors_per_company: 5,
3248 customers_per_company: 5,
3249 materials_per_company: 10,
3250 assets_per_company: 3,
3251 employees_per_company: 5,
3252 p2p_chains: 5,
3253 o2c_chains: 5,
3254 ..Default::default()
3255 };
3256
3257 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3258 let result = orchestrator.generate().unwrap();
3259
3260 assert!(!result.document_flows.vendor_invoices.is_empty());
3262 assert!(!result.document_flows.customer_invoices.is_empty());
3263
3264 assert!(!result.subledger.ap_invoices.is_empty());
3266 assert!(!result.subledger.ar_invoices.is_empty());
3267
3268 assert_eq!(
3270 result.subledger.ap_invoices.len(),
3271 result.document_flows.vendor_invoices.len()
3272 );
3273
3274 assert_eq!(
3276 result.subledger.ar_invoices.len(),
3277 result.document_flows.customer_invoices.len()
3278 );
3279
3280 assert_eq!(
3282 result.statistics.ap_invoice_count,
3283 result.subledger.ap_invoices.len()
3284 );
3285 assert_eq!(
3286 result.statistics.ar_invoice_count,
3287 result.subledger.ar_invoices.len()
3288 );
3289 }
3290
3291 #[test]
3292 fn test_balance_validation() {
3293 let config = create_test_config();
3294 let phase_config = PhaseConfig {
3295 generate_master_data: false,
3296 generate_document_flows: false,
3297 generate_journal_entries: true,
3298 inject_anomalies: false,
3299 validate_balances: true,
3300 show_progress: false,
3301 ..Default::default()
3302 };
3303
3304 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3305 let result = orchestrator.generate().unwrap();
3306
3307 assert!(result.balance_validation.validated);
3309 assert!(result.balance_validation.entries_processed > 0);
3310
3311 assert!(!result.balance_validation.has_unbalanced_entries);
3313
3314 assert_eq!(
3316 result.balance_validation.total_debits,
3317 result.balance_validation.total_credits
3318 );
3319 }
3320
3321 #[test]
3322 fn test_statistics_accuracy() {
3323 let config = create_test_config();
3324 let phase_config = PhaseConfig {
3325 generate_master_data: true,
3326 generate_document_flows: false,
3327 generate_journal_entries: true,
3328 inject_anomalies: false,
3329 show_progress: false,
3330 vendors_per_company: 10,
3331 customers_per_company: 20,
3332 materials_per_company: 15,
3333 assets_per_company: 5,
3334 employees_per_company: 8,
3335 ..Default::default()
3336 };
3337
3338 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3339 let result = orchestrator.generate().unwrap();
3340
3341 assert_eq!(
3343 result.statistics.vendor_count,
3344 result.master_data.vendors.len()
3345 );
3346 assert_eq!(
3347 result.statistics.customer_count,
3348 result.master_data.customers.len()
3349 );
3350 assert_eq!(
3351 result.statistics.material_count,
3352 result.master_data.materials.len()
3353 );
3354 assert_eq!(
3355 result.statistics.total_entries as usize,
3356 result.journal_entries.len()
3357 );
3358 }
3359
3360 #[test]
3361 fn test_phase_config_defaults() {
3362 let config = PhaseConfig::default();
3363 assert!(config.generate_master_data);
3364 assert!(config.generate_document_flows);
3365 assert!(config.generate_journal_entries);
3366 assert!(!config.inject_anomalies);
3367 assert!(config.validate_balances);
3368 assert!(config.show_progress);
3369 assert!(config.vendors_per_company > 0);
3370 assert!(config.customers_per_company > 0);
3371 }
3372
3373 #[test]
3374 fn test_get_coa_before_generation() {
3375 let config = create_test_config();
3376 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
3377
3378 assert!(orchestrator.get_coa().is_none());
3380 }
3381
3382 #[test]
3383 fn test_get_coa_after_generation() {
3384 let config = create_test_config();
3385 let phase_config = PhaseConfig {
3386 generate_master_data: false,
3387 generate_document_flows: false,
3388 generate_journal_entries: true,
3389 inject_anomalies: false,
3390 show_progress: false,
3391 ..Default::default()
3392 };
3393
3394 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3395 let _ = orchestrator.generate().unwrap();
3396
3397 assert!(orchestrator.get_coa().is_some());
3399 }
3400
3401 #[test]
3402 fn test_get_master_data() {
3403 let config = create_test_config();
3404 let phase_config = PhaseConfig {
3405 generate_master_data: true,
3406 generate_document_flows: false,
3407 generate_journal_entries: false,
3408 inject_anomalies: false,
3409 show_progress: false,
3410 vendors_per_company: 5,
3411 customers_per_company: 5,
3412 materials_per_company: 5,
3413 assets_per_company: 5,
3414 employees_per_company: 5,
3415 ..Default::default()
3416 };
3417
3418 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3419 let _ = orchestrator.generate().unwrap();
3420
3421 let master_data = orchestrator.get_master_data();
3422 assert!(!master_data.vendors.is_empty());
3423 }
3424
3425 #[test]
3426 fn test_with_progress_builder() {
3427 let config = create_test_config();
3428 let orchestrator = EnhancedOrchestrator::with_defaults(config)
3429 .unwrap()
3430 .with_progress(false);
3431
3432 assert!(!orchestrator.phase_config.show_progress);
3434 }
3435
3436 #[test]
3437 fn test_multi_company_generation() {
3438 let mut config = create_test_config();
3439 config.companies.push(CompanyConfig {
3440 code: "2000".to_string(),
3441 name: "Subsidiary".to_string(),
3442 currency: "EUR".to_string(),
3443 country: "DE".to_string(),
3444 annual_transaction_volume: TransactionVolume::TenK,
3445 volume_weight: 0.5,
3446 fiscal_year_variant: "K4".to_string(),
3447 });
3448
3449 let phase_config = PhaseConfig {
3450 generate_master_data: true,
3451 generate_document_flows: false,
3452 generate_journal_entries: true,
3453 inject_anomalies: false,
3454 show_progress: false,
3455 vendors_per_company: 5,
3456 customers_per_company: 5,
3457 materials_per_company: 5,
3458 assets_per_company: 5,
3459 employees_per_company: 5,
3460 ..Default::default()
3461 };
3462
3463 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3464 let result = orchestrator.generate().unwrap();
3465
3466 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
3469 assert!(result.statistics.companies_count == 2);
3470 }
3471
3472 #[test]
3473 fn test_empty_master_data_skips_document_flows() {
3474 let config = create_test_config();
3475 let phase_config = PhaseConfig {
3476 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
3479 inject_anomalies: false,
3480 show_progress: false,
3481 ..Default::default()
3482 };
3483
3484 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3485 let result = orchestrator.generate().unwrap();
3486
3487 assert!(result.document_flows.p2p_chains.is_empty());
3489 assert!(result.document_flows.o2c_chains.is_empty());
3490 }
3491
3492 #[test]
3493 fn test_journal_entry_line_item_count() {
3494 let config = create_test_config();
3495 let phase_config = PhaseConfig {
3496 generate_master_data: false,
3497 generate_document_flows: false,
3498 generate_journal_entries: true,
3499 inject_anomalies: false,
3500 show_progress: false,
3501 ..Default::default()
3502 };
3503
3504 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3505 let result = orchestrator.generate().unwrap();
3506
3507 let calculated_line_items: u64 = result
3509 .journal_entries
3510 .iter()
3511 .map(|e| e.line_count() as u64)
3512 .sum();
3513 assert_eq!(result.statistics.total_line_items, calculated_line_items);
3514 }
3515
3516 #[test]
3517 fn test_audit_generation() {
3518 let config = create_test_config();
3519 let phase_config = PhaseConfig {
3520 generate_master_data: false,
3521 generate_document_flows: false,
3522 generate_journal_entries: true,
3523 inject_anomalies: false,
3524 show_progress: false,
3525 generate_audit: true,
3526 audit_engagements: 2,
3527 workpapers_per_engagement: 5,
3528 evidence_per_workpaper: 2,
3529 risks_per_engagement: 3,
3530 findings_per_engagement: 2,
3531 judgments_per_engagement: 2,
3532 ..Default::default()
3533 };
3534
3535 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3536 let result = orchestrator.generate().unwrap();
3537
3538 assert_eq!(result.audit.engagements.len(), 2);
3540 assert!(!result.audit.workpapers.is_empty());
3541 assert!(!result.audit.evidence.is_empty());
3542 assert!(!result.audit.risk_assessments.is_empty());
3543 assert!(!result.audit.findings.is_empty());
3544 assert!(!result.audit.judgments.is_empty());
3545
3546 assert_eq!(
3548 result.statistics.audit_engagement_count,
3549 result.audit.engagements.len()
3550 );
3551 assert_eq!(
3552 result.statistics.audit_workpaper_count,
3553 result.audit.workpapers.len()
3554 );
3555 assert_eq!(
3556 result.statistics.audit_evidence_count,
3557 result.audit.evidence.len()
3558 );
3559 assert_eq!(
3560 result.statistics.audit_risk_count,
3561 result.audit.risk_assessments.len()
3562 );
3563 assert_eq!(
3564 result.statistics.audit_finding_count,
3565 result.audit.findings.len()
3566 );
3567 assert_eq!(
3568 result.statistics.audit_judgment_count,
3569 result.audit.judgments.len()
3570 );
3571 }
3572
3573 #[test]
3574 fn test_new_phases_disabled_by_default() {
3575 let config = create_test_config();
3576 assert!(!config.llm.enabled);
3578 assert!(!config.diffusion.enabled);
3579 assert!(!config.causal.enabled);
3580
3581 let phase_config = PhaseConfig {
3582 generate_master_data: false,
3583 generate_document_flows: false,
3584 generate_journal_entries: true,
3585 inject_anomalies: false,
3586 show_progress: false,
3587 ..Default::default()
3588 };
3589
3590 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3591 let result = orchestrator.generate().unwrap();
3592
3593 assert_eq!(result.statistics.llm_enrichment_ms, 0);
3595 assert_eq!(result.statistics.llm_vendors_enriched, 0);
3596 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
3597 assert_eq!(result.statistics.diffusion_samples_generated, 0);
3598 assert_eq!(result.statistics.causal_generation_ms, 0);
3599 assert_eq!(result.statistics.causal_samples_generated, 0);
3600 assert!(result.statistics.causal_validation_passed.is_none());
3601 }
3602
3603 #[test]
3604 fn test_llm_enrichment_enabled() {
3605 let mut config = create_test_config();
3606 config.llm.enabled = true;
3607 config.llm.max_vendor_enrichments = 3;
3608
3609 let phase_config = PhaseConfig {
3610 generate_master_data: true,
3611 generate_document_flows: false,
3612 generate_journal_entries: false,
3613 inject_anomalies: false,
3614 show_progress: false,
3615 vendors_per_company: 5,
3616 customers_per_company: 3,
3617 materials_per_company: 3,
3618 assets_per_company: 3,
3619 employees_per_company: 3,
3620 ..Default::default()
3621 };
3622
3623 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3624 let result = orchestrator.generate().unwrap();
3625
3626 assert!(result.statistics.llm_vendors_enriched > 0);
3628 assert!(result.statistics.llm_vendors_enriched <= 3);
3629 }
3630
3631 #[test]
3632 fn test_diffusion_enhancement_enabled() {
3633 let mut config = create_test_config();
3634 config.diffusion.enabled = true;
3635 config.diffusion.n_steps = 50;
3636 config.diffusion.sample_size = 20;
3637
3638 let phase_config = PhaseConfig {
3639 generate_master_data: false,
3640 generate_document_flows: false,
3641 generate_journal_entries: true,
3642 inject_anomalies: false,
3643 show_progress: false,
3644 ..Default::default()
3645 };
3646
3647 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3648 let result = orchestrator.generate().unwrap();
3649
3650 assert_eq!(result.statistics.diffusion_samples_generated, 20);
3652 }
3653
3654 #[test]
3655 fn test_causal_overlay_enabled() {
3656 let mut config = create_test_config();
3657 config.causal.enabled = true;
3658 config.causal.template = "fraud_detection".to_string();
3659 config.causal.sample_size = 100;
3660 config.causal.validate = true;
3661
3662 let phase_config = PhaseConfig {
3663 generate_master_data: false,
3664 generate_document_flows: false,
3665 generate_journal_entries: true,
3666 inject_anomalies: false,
3667 show_progress: false,
3668 ..Default::default()
3669 };
3670
3671 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3672 let result = orchestrator.generate().unwrap();
3673
3674 assert_eq!(result.statistics.causal_samples_generated, 100);
3676 assert!(result.statistics.causal_validation_passed.is_some());
3678 }
3679
3680 #[test]
3681 fn test_causal_overlay_revenue_cycle_template() {
3682 let mut config = create_test_config();
3683 config.causal.enabled = true;
3684 config.causal.template = "revenue_cycle".to_string();
3685 config.causal.sample_size = 50;
3686 config.causal.validate = false;
3687
3688 let phase_config = PhaseConfig {
3689 generate_master_data: false,
3690 generate_document_flows: false,
3691 generate_journal_entries: true,
3692 inject_anomalies: false,
3693 show_progress: false,
3694 ..Default::default()
3695 };
3696
3697 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3698 let result = orchestrator.generate().unwrap();
3699
3700 assert_eq!(result.statistics.causal_samples_generated, 50);
3702 assert!(result.statistics.causal_validation_passed.is_none());
3704 }
3705
3706 #[test]
3707 fn test_all_new_phases_enabled_together() {
3708 let mut config = create_test_config();
3709 config.llm.enabled = true;
3710 config.llm.max_vendor_enrichments = 2;
3711 config.diffusion.enabled = true;
3712 config.diffusion.n_steps = 20;
3713 config.diffusion.sample_size = 10;
3714 config.causal.enabled = true;
3715 config.causal.sample_size = 50;
3716 config.causal.validate = true;
3717
3718 let phase_config = PhaseConfig {
3719 generate_master_data: true,
3720 generate_document_flows: false,
3721 generate_journal_entries: true,
3722 inject_anomalies: false,
3723 show_progress: false,
3724 vendors_per_company: 5,
3725 customers_per_company: 3,
3726 materials_per_company: 3,
3727 assets_per_company: 3,
3728 employees_per_company: 3,
3729 ..Default::default()
3730 };
3731
3732 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
3733 let result = orchestrator.generate().unwrap();
3734
3735 assert!(result.statistics.llm_vendors_enriched > 0);
3737 assert_eq!(result.statistics.diffusion_samples_generated, 10);
3738 assert_eq!(result.statistics.causal_samples_generated, 50);
3739 assert!(result.statistics.causal_validation_passed.is_some());
3740 }
3741
3742 #[test]
3743 fn test_statistics_serialization_with_new_fields() {
3744 let stats = EnhancedGenerationStatistics {
3745 total_entries: 100,
3746 total_line_items: 500,
3747 llm_enrichment_ms: 42,
3748 llm_vendors_enriched: 10,
3749 diffusion_enhancement_ms: 100,
3750 diffusion_samples_generated: 50,
3751 causal_generation_ms: 200,
3752 causal_samples_generated: 100,
3753 causal_validation_passed: Some(true),
3754 ..Default::default()
3755 };
3756
3757 let json = serde_json::to_string(&stats).unwrap();
3758 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
3759
3760 assert_eq!(deserialized.llm_enrichment_ms, 42);
3761 assert_eq!(deserialized.llm_vendors_enriched, 10);
3762 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
3763 assert_eq!(deserialized.diffusion_samples_generated, 50);
3764 assert_eq!(deserialized.causal_generation_ms, 200);
3765 assert_eq!(deserialized.causal_samples_generated, 100);
3766 assert_eq!(deserialized.causal_validation_passed, Some(true));
3767 }
3768
3769 #[test]
3770 fn test_statistics_backward_compat_deserialization() {
3771 let old_json = r#"{
3773 "total_entries": 100,
3774 "total_line_items": 500,
3775 "accounts_count": 50,
3776 "companies_count": 1,
3777 "period_months": 12,
3778 "vendor_count": 10,
3779 "customer_count": 20,
3780 "material_count": 15,
3781 "asset_count": 5,
3782 "employee_count": 8,
3783 "p2p_chain_count": 5,
3784 "o2c_chain_count": 5,
3785 "ap_invoice_count": 5,
3786 "ar_invoice_count": 5,
3787 "ocpm_event_count": 0,
3788 "ocpm_object_count": 0,
3789 "ocpm_case_count": 0,
3790 "audit_engagement_count": 0,
3791 "audit_workpaper_count": 0,
3792 "audit_evidence_count": 0,
3793 "audit_risk_count": 0,
3794 "audit_finding_count": 0,
3795 "audit_judgment_count": 0,
3796 "anomalies_injected": 0,
3797 "data_quality_issues": 0,
3798 "banking_customer_count": 0,
3799 "banking_account_count": 0,
3800 "banking_transaction_count": 0,
3801 "banking_suspicious_count": 0,
3802 "graph_export_count": 0,
3803 "graph_node_count": 0,
3804 "graph_edge_count": 0
3805 }"#;
3806
3807 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
3808
3809 assert_eq!(stats.llm_enrichment_ms, 0);
3811 assert_eq!(stats.llm_vendors_enriched, 0);
3812 assert_eq!(stats.diffusion_enhancement_ms, 0);
3813 assert_eq!(stats.diffusion_samples_generated, 0);
3814 assert_eq!(stats.causal_generation_ms, 0);
3815 assert_eq!(stats.causal_samples_generated, 0);
3816 assert!(stats.causal_validation_passed.is_none());
3817 }
3818}