1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use serde::{Deserialize, Serialize};
28use tracing::{debug, info, warn};
29
30use datasynth_banking::{
31 models::{BankAccount, BankTransaction, BankingCustomer},
32 BankingOrchestratorBuilder,
33};
34use datasynth_config::schema::GeneratorConfig;
35use datasynth_core::error::{SynthError, SynthResult};
36use datasynth_core::models::audit::{
37 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
38};
39use datasynth_core::models::sourcing::{
40 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
41 SupplierBid, SupplierQualification, SupplierScorecard,
42};
43use datasynth_core::models::subledger::ap::APInvoice;
44use datasynth_core::models::subledger::ar::ARInvoice;
45use datasynth_core::models::*;
46use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
47use datasynth_fingerprint::{
48 io::FingerprintReader,
49 models::Fingerprint,
50 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
51};
52use datasynth_generators::{
53 AnomalyInjector,
55 AnomalyInjectorConfig,
56 AssetGenerator,
57 AuditEngagementGenerator,
59 BalanceTrackerConfig,
60 BidEvaluationGenerator,
62 BidGenerator,
63 CatalogGenerator,
64 ChartOfAccountsGenerator,
66 ContractGenerator,
67 CustomerGenerator,
68 DataQualityConfig,
69 DataQualityInjector,
71 DataQualityStats,
72 DocumentFlowJeConfig,
74 DocumentFlowJeGenerator,
75 DocumentFlowLinker,
77 EmployeeGenerator,
78 EvidenceGenerator,
79 FinancialStatementGenerator,
81 FindingGenerator,
82 JournalEntryGenerator,
83 JudgmentGenerator,
84 LatePaymentDistribution,
85 MaterialGenerator,
86 O2CDocumentChain,
87 O2CGenerator,
88 O2CGeneratorConfig,
89 O2CPaymentBehavior,
90 P2PDocumentChain,
91 P2PGenerator,
93 P2PGeneratorConfig,
94 P2PPaymentBehavior,
95 QualificationGenerator,
96 RfxGenerator,
97 RiskAssessmentGenerator,
98 RunningBalanceTracker,
100 ScorecardGenerator,
101 SourcingProjectGenerator,
102 SpendAnalysisGenerator,
103 ValidationError,
104 VendorGenerator,
106 WorkpaperGenerator,
107};
108use datasynth_graph::{
109 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
110};
111use datasynth_ocpm::{
112 EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
113 P2pDocuments,
114};
115
116use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
117use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
118use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
119use datasynth_core::llm::MockLlmProvider;
120use datasynth_core::models::documents::PaymentMethod;
121use datasynth_generators::llm_enrichment::VendorLlmEnricher;
122
123fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
129 let payment_behavior = &schema_config.payment_behavior;
130 let late_dist = &payment_behavior.late_payment_days_distribution;
131
132 P2PGeneratorConfig {
133 three_way_match_rate: schema_config.three_way_match_rate,
134 partial_delivery_rate: schema_config.partial_delivery_rate,
135 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
137 max_price_variance_percent: schema_config.max_price_variance_percent,
138 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
139 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
140 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
141 payment_method_distribution: vec![
142 (PaymentMethod::BankTransfer, 0.60),
143 (PaymentMethod::Check, 0.25),
144 (PaymentMethod::Wire, 0.10),
145 (PaymentMethod::CreditCard, 0.05),
146 ],
147 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
149 late_payment_rate: payment_behavior.late_payment_rate,
150 late_payment_distribution: LatePaymentDistribution {
151 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
152 late_8_to_14: late_dist.late_8_to_14,
153 very_late_15_to_30: late_dist.very_late_15_to_30,
154 severely_late_31_to_60: late_dist.severely_late_31_to_60,
155 extremely_late_over_60: late_dist.extremely_late_over_60,
156 },
157 partial_payment_rate: payment_behavior.partial_payment_rate,
158 payment_correction_rate: payment_behavior.payment_correction_rate,
159 },
160 }
161}
162
163fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
165 let payment_behavior = &schema_config.payment_behavior;
166
167 O2CGeneratorConfig {
168 credit_check_failure_rate: schema_config.credit_check_failure_rate,
169 partial_shipment_rate: schema_config.partial_shipment_rate,
170 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
171 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
172 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
173 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
175 returns_rate: schema_config.return_rate,
176 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
177 payment_method_distribution: vec![
178 (PaymentMethod::BankTransfer, 0.50),
179 (PaymentMethod::Check, 0.30),
180 (PaymentMethod::Wire, 0.15),
181 (PaymentMethod::CreditCard, 0.05),
182 ],
183 payment_behavior: O2CPaymentBehavior {
184 partial_payment_rate: payment_behavior.partial_payments.rate,
185 short_payment_rate: payment_behavior.short_payments.rate,
186 max_short_percent: payment_behavior.short_payments.max_short_percent,
187 on_account_rate: payment_behavior.on_account_payments.rate,
188 payment_correction_rate: payment_behavior.payment_corrections.rate,
189 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
190 },
191 }
192}
193
194#[derive(Debug, Clone)]
196pub struct PhaseConfig {
197 pub generate_master_data: bool,
199 pub generate_document_flows: bool,
201 pub generate_ocpm_events: bool,
203 pub generate_journal_entries: bool,
205 pub inject_anomalies: bool,
207 pub inject_data_quality: bool,
209 pub validate_balances: bool,
211 pub show_progress: bool,
213 pub vendors_per_company: usize,
215 pub customers_per_company: usize,
217 pub materials_per_company: usize,
219 pub assets_per_company: usize,
221 pub employees_per_company: usize,
223 pub p2p_chains: usize,
225 pub o2c_chains: usize,
227 pub generate_audit: bool,
229 pub audit_engagements: usize,
231 pub workpapers_per_engagement: usize,
233 pub evidence_per_workpaper: usize,
235 pub risks_per_engagement: usize,
237 pub findings_per_engagement: usize,
239 pub judgments_per_engagement: usize,
241 pub generate_banking: bool,
243 pub generate_graph_export: bool,
245 pub generate_sourcing: bool,
247 pub generate_bank_reconciliation: bool,
249 pub generate_financial_statements: bool,
251 pub generate_accounting_standards: bool,
253 pub generate_manufacturing: bool,
255 pub generate_sales_kpi_budgets: bool,
257}
258
259impl Default for PhaseConfig {
260 fn default() -> Self {
261 Self {
262 generate_master_data: true,
263 generate_document_flows: true,
264 generate_ocpm_events: false, generate_journal_entries: true,
266 inject_anomalies: false,
267 inject_data_quality: false, validate_balances: true,
269 show_progress: true,
270 vendors_per_company: 50,
271 customers_per_company: 100,
272 materials_per_company: 200,
273 assets_per_company: 50,
274 employees_per_company: 100,
275 p2p_chains: 100,
276 o2c_chains: 100,
277 generate_audit: false, audit_engagements: 5,
279 workpapers_per_engagement: 20,
280 evidence_per_workpaper: 5,
281 risks_per_engagement: 15,
282 findings_per_engagement: 8,
283 judgments_per_engagement: 10,
284 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, }
293 }
294}
295
296#[derive(Debug, Clone, Default)]
298pub struct MasterDataSnapshot {
299 pub vendors: Vec<Vendor>,
301 pub customers: Vec<Customer>,
303 pub materials: Vec<Material>,
305 pub assets: Vec<FixedAsset>,
307 pub employees: Vec<Employee>,
309}
310
311#[derive(Debug, Clone)]
313pub struct HypergraphExportInfo {
314 pub node_count: usize,
316 pub edge_count: usize,
318 pub hyperedge_count: usize,
320 pub output_path: PathBuf,
322}
323
324#[derive(Debug, Clone, Default)]
326pub struct DocumentFlowSnapshot {
327 pub p2p_chains: Vec<P2PDocumentChain>,
329 pub o2c_chains: Vec<O2CDocumentChain>,
331 pub purchase_orders: Vec<documents::PurchaseOrder>,
333 pub goods_receipts: Vec<documents::GoodsReceipt>,
335 pub vendor_invoices: Vec<documents::VendorInvoice>,
337 pub sales_orders: Vec<documents::SalesOrder>,
339 pub deliveries: Vec<documents::Delivery>,
341 pub customer_invoices: Vec<documents::CustomerInvoice>,
343 pub payments: Vec<documents::Payment>,
345}
346
347#[derive(Debug, Clone, Default)]
349pub struct SubledgerSnapshot {
350 pub ap_invoices: Vec<APInvoice>,
352 pub ar_invoices: Vec<ARInvoice>,
354}
355
356#[derive(Debug, Clone, Default)]
358pub struct OcpmSnapshot {
359 pub event_log: Option<OcpmEventLog>,
361 pub event_count: usize,
363 pub object_count: usize,
365 pub case_count: usize,
367}
368
369#[derive(Debug, Clone, Default)]
371pub struct AuditSnapshot {
372 pub engagements: Vec<AuditEngagement>,
374 pub workpapers: Vec<Workpaper>,
376 pub evidence: Vec<AuditEvidence>,
378 pub risk_assessments: Vec<RiskAssessment>,
380 pub findings: Vec<AuditFinding>,
382 pub judgments: Vec<ProfessionalJudgment>,
384}
385
386#[derive(Debug, Clone, Default)]
388pub struct BankingSnapshot {
389 pub customers: Vec<BankingCustomer>,
391 pub accounts: Vec<BankAccount>,
393 pub transactions: Vec<BankTransaction>,
395 pub suspicious_count: usize,
397 pub scenario_count: usize,
399}
400
401#[derive(Debug, Clone, Default)]
403pub struct GraphExportSnapshot {
404 pub exported: bool,
406 pub graph_count: usize,
408 pub exports: HashMap<String, GraphExportInfo>,
410}
411
412#[derive(Debug, Clone)]
414pub struct GraphExportInfo {
415 pub name: String,
417 pub format: String,
419 pub output_path: PathBuf,
421 pub node_count: usize,
423 pub edge_count: usize,
425}
426
427#[derive(Debug, Clone, Default)]
429pub struct SourcingSnapshot {
430 pub spend_analyses: Vec<SpendAnalysis>,
432 pub sourcing_projects: Vec<SourcingProject>,
434 pub qualifications: Vec<SupplierQualification>,
436 pub rfx_events: Vec<RfxEvent>,
438 pub bids: Vec<SupplierBid>,
440 pub bid_evaluations: Vec<BidEvaluation>,
442 pub contracts: Vec<ProcurementContract>,
444 pub catalog_items: Vec<CatalogItem>,
446 pub scorecards: Vec<SupplierScorecard>,
448}
449
450#[derive(Debug, Clone, Default)]
452pub struct FinancialReportingSnapshot {
453 pub financial_statements: Vec<FinancialStatement>,
455 pub bank_reconciliations: Vec<BankReconciliation>,
457}
458
459#[derive(Debug, Clone, Default)]
461pub struct HrSnapshot {
462 pub payroll_run_count: usize,
464 pub payroll_line_item_count: usize,
466 pub time_entry_count: usize,
468 pub expense_report_count: usize,
470}
471
472#[derive(Debug, Clone, Default)]
474pub struct AccountingStandardsSnapshot {
475 pub revenue_contract_count: usize,
477 pub impairment_test_count: usize,
479}
480
481#[derive(Debug, Clone, Default)]
483pub struct ManufacturingSnapshot {
484 pub production_order_count: usize,
486 pub quality_inspection_count: usize,
488 pub cycle_count_count: usize,
490}
491
492#[derive(Debug, Clone, Default)]
494pub struct SalesKpiBudgetsSnapshot {
495 pub sales_quote_count: usize,
497 pub kpi_count: usize,
499 pub budget_line_count: usize,
501}
502
503#[derive(Debug, Clone, Default)]
505pub struct AnomalyLabels {
506 pub labels: Vec<LabeledAnomaly>,
508 pub summary: Option<AnomalySummary>,
510 pub by_type: HashMap<String, usize>,
512}
513
514#[derive(Debug, Clone, Default)]
516pub struct BalanceValidationResult {
517 pub validated: bool,
519 pub is_balanced: bool,
521 pub entries_processed: u64,
523 pub total_debits: rust_decimal::Decimal,
525 pub total_credits: rust_decimal::Decimal,
527 pub accounts_tracked: usize,
529 pub companies_tracked: usize,
531 pub validation_errors: Vec<ValidationError>,
533 pub has_unbalanced_entries: bool,
535}
536
537#[derive(Debug)]
539pub struct EnhancedGenerationResult {
540 pub chart_of_accounts: ChartOfAccounts,
542 pub master_data: MasterDataSnapshot,
544 pub document_flows: DocumentFlowSnapshot,
546 pub subledger: SubledgerSnapshot,
548 pub ocpm: OcpmSnapshot,
550 pub audit: AuditSnapshot,
552 pub banking: BankingSnapshot,
554 pub graph_export: GraphExportSnapshot,
556 pub sourcing: SourcingSnapshot,
558 pub financial_reporting: FinancialReportingSnapshot,
560 pub hr: HrSnapshot,
562 pub accounting_standards: AccountingStandardsSnapshot,
564 pub manufacturing: ManufacturingSnapshot,
566 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
568 pub journal_entries: Vec<JournalEntry>,
570 pub anomaly_labels: AnomalyLabels,
572 pub balance_validation: BalanceValidationResult,
574 pub data_quality_stats: DataQualityStats,
576 pub statistics: EnhancedGenerationStatistics,
578 pub lineage: Option<super::lineage::LineageGraph>,
580 pub gate_result: Option<datasynth_eval::gates::GateResult>,
582}
583
584#[derive(Debug, Clone, Default, Serialize, Deserialize)]
586pub struct EnhancedGenerationStatistics {
587 pub total_entries: u64,
589 pub total_line_items: u64,
591 pub accounts_count: usize,
593 pub companies_count: usize,
595 pub period_months: u32,
597 pub vendor_count: usize,
599 pub customer_count: usize,
600 pub material_count: usize,
601 pub asset_count: usize,
602 pub employee_count: usize,
603 pub p2p_chain_count: usize,
605 pub o2c_chain_count: usize,
606 pub ap_invoice_count: usize,
608 pub ar_invoice_count: usize,
609 pub ocpm_event_count: usize,
611 pub ocpm_object_count: usize,
612 pub ocpm_case_count: usize,
613 pub audit_engagement_count: usize,
615 pub audit_workpaper_count: usize,
616 pub audit_evidence_count: usize,
617 pub audit_risk_count: usize,
618 pub audit_finding_count: usize,
619 pub audit_judgment_count: usize,
620 pub anomalies_injected: usize,
622 pub data_quality_issues: usize,
624 pub banking_customer_count: usize,
626 pub banking_account_count: usize,
627 pub banking_transaction_count: usize,
628 pub banking_suspicious_count: usize,
629 pub graph_export_count: usize,
631 pub graph_node_count: usize,
632 pub graph_edge_count: usize,
633 #[serde(default)]
635 pub llm_enrichment_ms: u64,
636 #[serde(default)]
638 pub llm_vendors_enriched: usize,
639 #[serde(default)]
641 pub diffusion_enhancement_ms: u64,
642 #[serde(default)]
644 pub diffusion_samples_generated: usize,
645 #[serde(default)]
647 pub causal_generation_ms: u64,
648 #[serde(default)]
650 pub causal_samples_generated: usize,
651 #[serde(default)]
653 pub causal_validation_passed: Option<bool>,
654 #[serde(default)]
656 pub sourcing_project_count: usize,
657 #[serde(default)]
658 pub rfx_event_count: usize,
659 #[serde(default)]
660 pub bid_count: usize,
661 #[serde(default)]
662 pub contract_count: usize,
663 #[serde(default)]
664 pub catalog_item_count: usize,
665 #[serde(default)]
666 pub scorecard_count: usize,
667 #[serde(default)]
669 pub financial_statement_count: usize,
670 #[serde(default)]
671 pub bank_reconciliation_count: usize,
672 #[serde(default)]
674 pub payroll_run_count: usize,
675 #[serde(default)]
676 pub time_entry_count: usize,
677 #[serde(default)]
678 pub expense_report_count: usize,
679 #[serde(default)]
681 pub revenue_contract_count: usize,
682 #[serde(default)]
683 pub impairment_test_count: usize,
684 #[serde(default)]
686 pub production_order_count: usize,
687 #[serde(default)]
688 pub quality_inspection_count: usize,
689 #[serde(default)]
690 pub cycle_count_count: usize,
691 #[serde(default)]
693 pub sales_quote_count: usize,
694 #[serde(default)]
695 pub kpi_count: usize,
696 #[serde(default)]
697 pub budget_line_count: usize,
698}
699
700pub struct EnhancedOrchestrator {
702 config: GeneratorConfig,
703 phase_config: PhaseConfig,
704 coa: Option<Arc<ChartOfAccounts>>,
705 master_data: MasterDataSnapshot,
706 seed: u64,
707 multi_progress: Option<MultiProgress>,
708 resource_guard: ResourceGuard,
710 output_path: Option<PathBuf>,
712 copula_generators: Vec<CopulaGeneratorSpec>,
714}
715
716impl EnhancedOrchestrator {
717 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
719 datasynth_config::validate_config(&config)?;
720
721 let seed = config.global.seed.unwrap_or_else(rand::random);
722
723 let resource_guard = Self::build_resource_guard(&config, None);
725
726 Ok(Self {
727 config,
728 phase_config,
729 coa: None,
730 master_data: MasterDataSnapshot::default(),
731 seed,
732 multi_progress: None,
733 resource_guard,
734 output_path: None,
735 copula_generators: Vec::new(),
736 })
737 }
738
739 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
741 Self::new(config, PhaseConfig::default())
742 }
743
744 pub fn with_progress(mut self, show: bool) -> Self {
746 self.phase_config.show_progress = show;
747 if show {
748 self.multi_progress = Some(MultiProgress::new());
749 }
750 self
751 }
752
753 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
755 let path = path.into();
756 self.output_path = Some(path.clone());
757 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
759 self
760 }
761
762 pub fn has_copulas(&self) -> bool {
767 !self.copula_generators.is_empty()
768 }
769
770 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
776 &self.copula_generators
777 }
778
779 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
783 &mut self.copula_generators
784 }
785
786 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
790 self.copula_generators
791 .iter_mut()
792 .find(|c| c.name == copula_name)
793 .map(|c| c.generator.sample())
794 }
795
796 pub fn from_fingerprint(
819 fingerprint_path: &std::path::Path,
820 phase_config: PhaseConfig,
821 scale: f64,
822 ) -> SynthResult<Self> {
823 info!("Loading fingerprint from: {}", fingerprint_path.display());
824
825 let reader = FingerprintReader::new();
827 let fingerprint = reader
828 .read_from_file(fingerprint_path)
829 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
830
831 Self::from_fingerprint_data(fingerprint, phase_config, scale)
832 }
833
834 pub fn from_fingerprint_data(
841 fingerprint: Fingerprint,
842 phase_config: PhaseConfig,
843 scale: f64,
844 ) -> SynthResult<Self> {
845 info!(
846 "Synthesizing config from fingerprint (version: {}, tables: {})",
847 fingerprint.manifest.version,
848 fingerprint.schema.tables.len()
849 );
850
851 let seed: u64 = rand::random();
853
854 let options = SynthesisOptions {
856 scale,
857 seed: Some(seed),
858 preserve_correlations: true,
859 inject_anomalies: true,
860 };
861 let synthesizer = ConfigSynthesizer::with_options(options);
862
863 let synthesis_result = synthesizer
865 .synthesize_full(&fingerprint, seed)
866 .map_err(|e| {
867 SynthError::config(format!(
868 "Failed to synthesize config from fingerprint: {}",
869 e
870 ))
871 })?;
872
873 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
875 Self::base_config_for_industry(industry)
876 } else {
877 Self::base_config_for_industry("manufacturing")
878 };
879
880 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
882
883 info!(
885 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
886 fingerprint.schema.tables.len(),
887 scale,
888 synthesis_result.copula_generators.len()
889 );
890
891 if !synthesis_result.copula_generators.is_empty() {
892 for spec in &synthesis_result.copula_generators {
893 info!(
894 " Copula '{}' for table '{}': {} columns",
895 spec.name,
896 spec.table,
897 spec.columns.len()
898 );
899 }
900 }
901
902 let mut orchestrator = Self::new(config, phase_config)?;
904
905 orchestrator.copula_generators = synthesis_result.copula_generators;
907
908 Ok(orchestrator)
909 }
910
911 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
913 use datasynth_config::presets::create_preset;
914 use datasynth_config::TransactionVolume;
915 use datasynth_core::models::{CoAComplexity, IndustrySector};
916
917 let sector = match industry.to_lowercase().as_str() {
918 "manufacturing" => IndustrySector::Manufacturing,
919 "retail" => IndustrySector::Retail,
920 "financial" | "financial_services" => IndustrySector::FinancialServices,
921 "healthcare" => IndustrySector::Healthcare,
922 "technology" | "tech" => IndustrySector::Technology,
923 _ => IndustrySector::Manufacturing,
924 };
925
926 create_preset(
928 sector,
929 1, 12, CoAComplexity::Medium,
932 TransactionVolume::TenK,
933 )
934 }
935
936 fn apply_config_patch(
938 mut config: GeneratorConfig,
939 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
940 ) -> GeneratorConfig {
941 use datasynth_fingerprint::synthesis::ConfigValue;
942
943 for (key, value) in patch.values() {
944 match (key.as_str(), value) {
945 ("transactions.count", ConfigValue::Integer(n)) => {
948 info!(
949 "Fingerprint suggests {} transactions (apply via company volumes)",
950 n
951 );
952 }
953 ("global.period_months", ConfigValue::Integer(n)) => {
954 config.global.period_months = *n as u32;
955 }
956 ("global.start_date", ConfigValue::String(s)) => {
957 config.global.start_date = s.clone();
958 }
959 ("global.seed", ConfigValue::Integer(n)) => {
960 config.global.seed = Some(*n as u64);
961 }
962 ("fraud.enabled", ConfigValue::Bool(b)) => {
963 config.fraud.enabled = *b;
964 }
965 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
966 config.fraud.fraud_rate = *f;
967 }
968 ("data_quality.enabled", ConfigValue::Bool(b)) => {
969 config.data_quality.enabled = *b;
970 }
971 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
973 config.fraud.enabled = *b;
974 }
975 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
976 config.fraud.fraud_rate = *f;
977 }
978 _ => {
979 debug!("Ignoring unknown config patch key: {}", key);
980 }
981 }
982 }
983
984 config
985 }
986
987 fn build_resource_guard(
989 config: &GeneratorConfig,
990 output_path: Option<PathBuf>,
991 ) -> ResourceGuard {
992 let mut builder = ResourceGuardBuilder::new();
993
994 if config.global.memory_limit_mb > 0 {
996 builder = builder.memory_limit(config.global.memory_limit_mb);
997 }
998
999 if let Some(path) = output_path {
1001 builder = builder.output_path(path).min_free_disk(100); }
1003
1004 builder = builder.conservative();
1006
1007 builder.build()
1008 }
1009
1010 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1015 self.resource_guard.check()
1016 }
1017
1018 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1020 let level = self.resource_guard.check()?;
1021
1022 if level != DegradationLevel::Normal {
1023 warn!(
1024 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1025 phase,
1026 level,
1027 self.resource_guard.current_memory_mb(),
1028 self.resource_guard.available_disk_mb()
1029 );
1030 }
1031
1032 Ok(level)
1033 }
1034
1035 fn get_degradation_actions(&self) -> DegradationActions {
1037 self.resource_guard.get_actions()
1038 }
1039
1040 fn check_memory_limit(&self) -> SynthResult<()> {
1042 self.check_resources()?;
1043 Ok(())
1044 }
1045
1046 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1048 info!("Starting enhanced generation workflow");
1049 info!(
1050 "Config: industry={:?}, period_months={}, companies={}",
1051 self.config.global.industry,
1052 self.config.global.period_months,
1053 self.config.companies.len()
1054 );
1055
1056 let initial_level = self.check_resources_with_log("initial")?;
1058 if initial_level == DegradationLevel::Emergency {
1059 return Err(SynthError::resource(
1060 "Insufficient resources to start generation",
1061 ));
1062 }
1063
1064 let mut stats = EnhancedGenerationStatistics {
1065 companies_count: self.config.companies.len(),
1066 period_months: self.config.global.period_months,
1067 ..Default::default()
1068 };
1069
1070 let coa = self.phase_chart_of_accounts(&mut stats)?;
1072
1073 self.phase_master_data(&mut stats)?;
1075
1076 let (document_flows, subledger) = self.phase_document_flows(&mut stats)?;
1078
1079 let ocpm = self.phase_ocpm_events(&document_flows, &mut stats)?;
1081
1082 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1084
1085 let actions = self.get_degradation_actions();
1087
1088 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1090
1091 let balance_validation = self.phase_balance_validation(&entries)?;
1093
1094 let data_quality_stats =
1096 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1097
1098 let audit = self.phase_audit_data(&entries, &mut stats)?;
1100
1101 let banking = self.phase_banking_data(&mut stats)?;
1103
1104 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1106
1107 self.phase_hypergraph_export(&coa, &entries, &document_flows, &mut stats)?;
1109
1110 self.phase_llm_enrichment(&mut stats);
1112
1113 self.phase_diffusion_enhancement(&mut stats);
1115
1116 self.phase_causal_overlay(&mut stats);
1118
1119 let sourcing = self.phase_sourcing_data(&mut stats)?;
1121
1122 let financial_reporting = self.phase_financial_reporting(&document_flows, &mut stats)?;
1124
1125 let hr = self.phase_hr_data(&mut stats)?;
1127
1128 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1130
1131 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1133
1134 let sales_kpi_budgets = self.phase_sales_kpi_budgets(&coa, &mut stats)?;
1136
1137 let resource_stats = self.resource_guard.stats();
1139 info!(
1140 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1141 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1142 resource_stats.disk.estimated_bytes_written,
1143 resource_stats.degradation_level
1144 );
1145
1146 let lineage = self.build_lineage_graph();
1148
1149 Ok(EnhancedGenerationResult {
1150 chart_of_accounts: (*coa).clone(),
1151 master_data: self.master_data.clone(),
1152 document_flows,
1153 subledger,
1154 ocpm,
1155 audit,
1156 banking,
1157 graph_export,
1158 sourcing,
1159 financial_reporting,
1160 hr,
1161 accounting_standards,
1162 manufacturing: manufacturing_snap,
1163 sales_kpi_budgets,
1164 journal_entries: entries,
1165 anomaly_labels,
1166 balance_validation,
1167 data_quality_stats,
1168 statistics: stats,
1169 lineage: Some(lineage),
1170 gate_result: None,
1171 })
1172 }
1173
1174 fn phase_chart_of_accounts(
1180 &mut self,
1181 stats: &mut EnhancedGenerationStatistics,
1182 ) -> SynthResult<Arc<ChartOfAccounts>> {
1183 info!("Phase 1: Generating Chart of Accounts");
1184 let coa = self.generate_coa()?;
1185 stats.accounts_count = coa.account_count();
1186 info!(
1187 "Chart of Accounts generated: {} accounts",
1188 stats.accounts_count
1189 );
1190 self.check_resources_with_log("post-coa")?;
1191 Ok(coa)
1192 }
1193
1194 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1196 if self.phase_config.generate_master_data {
1197 info!("Phase 2: Generating Master Data");
1198 self.generate_master_data()?;
1199 stats.vendor_count = self.master_data.vendors.len();
1200 stats.customer_count = self.master_data.customers.len();
1201 stats.material_count = self.master_data.materials.len();
1202 stats.asset_count = self.master_data.assets.len();
1203 stats.employee_count = self.master_data.employees.len();
1204 info!(
1205 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1206 stats.vendor_count, stats.customer_count, stats.material_count,
1207 stats.asset_count, stats.employee_count
1208 );
1209 self.check_resources_with_log("post-master-data")?;
1210 } else {
1211 debug!("Phase 2: Skipped (master data generation disabled)");
1212 }
1213 Ok(())
1214 }
1215
1216 fn phase_document_flows(
1218 &mut self,
1219 stats: &mut EnhancedGenerationStatistics,
1220 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot)> {
1221 let mut document_flows = DocumentFlowSnapshot::default();
1222 let mut subledger = SubledgerSnapshot::default();
1223
1224 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1225 info!("Phase 3: Generating Document Flows");
1226 self.generate_document_flows(&mut document_flows)?;
1227 stats.p2p_chain_count = document_flows.p2p_chains.len();
1228 stats.o2c_chain_count = document_flows.o2c_chains.len();
1229 info!(
1230 "Document flows generated: {} P2P chains, {} O2C chains",
1231 stats.p2p_chain_count, stats.o2c_chain_count
1232 );
1233
1234 debug!("Phase 3b: Linking document flows to subledgers");
1236 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1237 stats.ap_invoice_count = subledger.ap_invoices.len();
1238 stats.ar_invoice_count = subledger.ar_invoices.len();
1239 debug!(
1240 "Subledgers linked: {} AP invoices, {} AR invoices",
1241 stats.ap_invoice_count, stats.ar_invoice_count
1242 );
1243
1244 self.check_resources_with_log("post-document-flows")?;
1245 } else {
1246 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1247 }
1248
1249 Ok((document_flows, subledger))
1250 }
1251
1252 fn phase_ocpm_events(
1254 &mut self,
1255 document_flows: &DocumentFlowSnapshot,
1256 stats: &mut EnhancedGenerationStatistics,
1257 ) -> SynthResult<OcpmSnapshot> {
1258 if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
1259 info!("Phase 3c: Generating OCPM Events");
1260 let ocpm_snapshot = self.generate_ocpm_events(document_flows)?;
1261 stats.ocpm_event_count = ocpm_snapshot.event_count;
1262 stats.ocpm_object_count = ocpm_snapshot.object_count;
1263 stats.ocpm_case_count = ocpm_snapshot.case_count;
1264 info!(
1265 "OCPM events generated: {} events, {} objects, {} cases",
1266 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1267 );
1268 self.check_resources_with_log("post-ocpm")?;
1269 Ok(ocpm_snapshot)
1270 } else {
1271 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1272 Ok(OcpmSnapshot::default())
1273 }
1274 }
1275
1276 fn phase_journal_entries(
1278 &mut self,
1279 coa: &Arc<ChartOfAccounts>,
1280 document_flows: &DocumentFlowSnapshot,
1281 stats: &mut EnhancedGenerationStatistics,
1282 ) -> SynthResult<Vec<JournalEntry>> {
1283 let mut entries = Vec::new();
1284
1285 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1287 debug!("Phase 4a: Generating JEs from document flows");
1288 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1289 debug!("Generated {} JEs from document flows", flow_entries.len());
1290 entries.extend(flow_entries);
1291 }
1292
1293 if self.phase_config.generate_journal_entries {
1295 info!("Phase 4: Generating Journal Entries");
1296 let je_entries = self.generate_journal_entries(coa)?;
1297 info!("Generated {} standalone journal entries", je_entries.len());
1298 entries.extend(je_entries);
1299 } else {
1300 debug!("Phase 4: Skipped (journal entry generation disabled)");
1301 }
1302
1303 if !entries.is_empty() {
1304 stats.total_entries = entries.len() as u64;
1305 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1306 info!(
1307 "Total entries: {}, total line items: {}",
1308 stats.total_entries, stats.total_line_items
1309 );
1310 self.check_resources_with_log("post-journal-entries")?;
1311 }
1312
1313 Ok(entries)
1314 }
1315
1316 fn phase_anomaly_injection(
1318 &mut self,
1319 entries: &mut [JournalEntry],
1320 actions: &DegradationActions,
1321 stats: &mut EnhancedGenerationStatistics,
1322 ) -> SynthResult<AnomalyLabels> {
1323 if self.phase_config.inject_anomalies
1324 && !entries.is_empty()
1325 && !actions.skip_anomaly_injection
1326 {
1327 info!("Phase 5: Injecting Anomalies");
1328 let result = self.inject_anomalies(entries)?;
1329 stats.anomalies_injected = result.labels.len();
1330 info!("Injected {} anomalies", stats.anomalies_injected);
1331 self.check_resources_with_log("post-anomaly-injection")?;
1332 Ok(result)
1333 } else if actions.skip_anomaly_injection {
1334 warn!("Phase 5: Skipped due to resource degradation");
1335 Ok(AnomalyLabels::default())
1336 } else {
1337 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1338 Ok(AnomalyLabels::default())
1339 }
1340 }
1341
1342 fn phase_balance_validation(
1344 &mut self,
1345 entries: &[JournalEntry],
1346 ) -> SynthResult<BalanceValidationResult> {
1347 if self.phase_config.validate_balances && !entries.is_empty() {
1348 debug!("Phase 6: Validating Balances");
1349 let balance_validation = self.validate_journal_entries(entries)?;
1350 if balance_validation.is_balanced {
1351 debug!("Balance validation passed");
1352 } else {
1353 warn!(
1354 "Balance validation found {} errors",
1355 balance_validation.validation_errors.len()
1356 );
1357 }
1358 Ok(balance_validation)
1359 } else {
1360 Ok(BalanceValidationResult::default())
1361 }
1362 }
1363
1364 fn phase_data_quality_injection(
1366 &mut self,
1367 entries: &mut [JournalEntry],
1368 actions: &DegradationActions,
1369 stats: &mut EnhancedGenerationStatistics,
1370 ) -> SynthResult<DataQualityStats> {
1371 if self.phase_config.inject_data_quality
1372 && !entries.is_empty()
1373 && !actions.skip_data_quality
1374 {
1375 info!("Phase 7: Injecting Data Quality Variations");
1376 let dq_stats = self.inject_data_quality(entries)?;
1377 stats.data_quality_issues = dq_stats.records_with_issues;
1378 info!("Injected {} data quality issues", stats.data_quality_issues);
1379 self.check_resources_with_log("post-data-quality")?;
1380 Ok(dq_stats)
1381 } else if actions.skip_data_quality {
1382 warn!("Phase 7: Skipped due to resource degradation");
1383 Ok(DataQualityStats::default())
1384 } else {
1385 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1386 Ok(DataQualityStats::default())
1387 }
1388 }
1389
1390 fn phase_audit_data(
1392 &mut self,
1393 entries: &[JournalEntry],
1394 stats: &mut EnhancedGenerationStatistics,
1395 ) -> SynthResult<AuditSnapshot> {
1396 if self.phase_config.generate_audit {
1397 info!("Phase 8: Generating Audit Data");
1398 let audit_snapshot = self.generate_audit_data(entries)?;
1399 stats.audit_engagement_count = audit_snapshot.engagements.len();
1400 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1401 stats.audit_evidence_count = audit_snapshot.evidence.len();
1402 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1403 stats.audit_finding_count = audit_snapshot.findings.len();
1404 stats.audit_judgment_count = audit_snapshot.judgments.len();
1405 info!(
1406 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1407 stats.audit_engagement_count, stats.audit_workpaper_count,
1408 stats.audit_evidence_count, stats.audit_risk_count,
1409 stats.audit_finding_count, stats.audit_judgment_count
1410 );
1411 self.check_resources_with_log("post-audit")?;
1412 Ok(audit_snapshot)
1413 } else {
1414 debug!("Phase 8: Skipped (audit generation disabled)");
1415 Ok(AuditSnapshot::default())
1416 }
1417 }
1418
1419 fn phase_banking_data(
1421 &mut self,
1422 stats: &mut EnhancedGenerationStatistics,
1423 ) -> SynthResult<BankingSnapshot> {
1424 if self.phase_config.generate_banking && self.config.banking.enabled {
1425 info!("Phase 9: Generating Banking KYC/AML Data");
1426 let banking_snapshot = self.generate_banking_data()?;
1427 stats.banking_customer_count = banking_snapshot.customers.len();
1428 stats.banking_account_count = banking_snapshot.accounts.len();
1429 stats.banking_transaction_count = banking_snapshot.transactions.len();
1430 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1431 info!(
1432 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1433 stats.banking_customer_count, stats.banking_account_count,
1434 stats.banking_transaction_count, stats.banking_suspicious_count
1435 );
1436 self.check_resources_with_log("post-banking")?;
1437 Ok(banking_snapshot)
1438 } else {
1439 debug!("Phase 9: Skipped (banking generation disabled)");
1440 Ok(BankingSnapshot::default())
1441 }
1442 }
1443
1444 fn phase_graph_export(
1446 &mut self,
1447 entries: &[JournalEntry],
1448 coa: &Arc<ChartOfAccounts>,
1449 stats: &mut EnhancedGenerationStatistics,
1450 ) -> SynthResult<GraphExportSnapshot> {
1451 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
1452 && !entries.is_empty()
1453 {
1454 info!("Phase 10: Exporting Accounting Network Graphs");
1455 match self.export_graphs(entries, coa, stats) {
1456 Ok(snapshot) => {
1457 info!(
1458 "Graph export complete: {} graphs ({} nodes, {} edges)",
1459 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1460 );
1461 Ok(snapshot)
1462 }
1463 Err(e) => {
1464 warn!("Phase 10: Graph export failed: {}", e);
1465 Ok(GraphExportSnapshot::default())
1466 }
1467 }
1468 } else {
1469 debug!("Phase 10: Skipped (graph export disabled or no entries)");
1470 Ok(GraphExportSnapshot::default())
1471 }
1472 }
1473
1474 fn phase_hypergraph_export(
1476 &self,
1477 coa: &Arc<ChartOfAccounts>,
1478 entries: &[JournalEntry],
1479 document_flows: &DocumentFlowSnapshot,
1480 stats: &mut EnhancedGenerationStatistics,
1481 ) -> SynthResult<()> {
1482 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
1483 info!("Phase 10b: Exporting Multi-Layer Hypergraph");
1484 match self.export_hypergraph(coa, entries, document_flows, stats) {
1485 Ok(info) => {
1486 info!(
1487 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
1488 info.node_count, info.edge_count, info.hyperedge_count
1489 );
1490 }
1491 Err(e) => {
1492 warn!("Phase 10b: Hypergraph export failed: {}", e);
1493 }
1494 }
1495 } else {
1496 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
1497 }
1498 Ok(())
1499 }
1500
1501 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
1507 if !self.config.llm.enabled {
1508 debug!("Phase 11: Skipped (LLM enrichment disabled)");
1509 return;
1510 }
1511
1512 info!("Phase 11: Starting LLM Enrichment");
1513 let start = std::time::Instant::now();
1514
1515 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1516 let provider = Arc::new(MockLlmProvider::new(self.seed));
1517 let enricher = VendorLlmEnricher::new(provider);
1518
1519 let industry = format!("{:?}", self.config.global.industry);
1520 let max_enrichments = self
1521 .config
1522 .llm
1523 .max_vendor_enrichments
1524 .min(self.master_data.vendors.len());
1525
1526 let mut enriched_count = 0usize;
1527 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
1528 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
1529 Ok(name) => {
1530 vendor.name = name;
1531 enriched_count += 1;
1532 }
1533 Err(e) => {
1534 warn!(
1535 "LLM vendor enrichment failed for {}: {}",
1536 vendor.vendor_id, e
1537 );
1538 }
1539 }
1540 }
1541
1542 enriched_count
1543 }));
1544
1545 match result {
1546 Ok(enriched_count) => {
1547 stats.llm_vendors_enriched = enriched_count;
1548 let elapsed = start.elapsed();
1549 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1550 info!(
1551 "Phase 11 complete: {} vendors enriched in {}ms",
1552 enriched_count, stats.llm_enrichment_ms
1553 );
1554 }
1555 Err(_) => {
1556 let elapsed = start.elapsed();
1557 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1558 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
1559 }
1560 }
1561 }
1562
1563 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
1569 if !self.config.diffusion.enabled {
1570 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
1571 return;
1572 }
1573
1574 info!("Phase 12: Starting Diffusion Enhancement");
1575 let start = std::time::Instant::now();
1576
1577 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1578 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
1581
1582 let diffusion_config = DiffusionConfig {
1583 n_steps: self.config.diffusion.n_steps,
1584 seed: self.seed,
1585 ..Default::default()
1586 };
1587
1588 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
1589
1590 let n_samples = self.config.diffusion.sample_size;
1591 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
1593
1594 samples.len()
1595 }));
1596
1597 match result {
1598 Ok(sample_count) => {
1599 stats.diffusion_samples_generated = sample_count;
1600 let elapsed = start.elapsed();
1601 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1602 info!(
1603 "Phase 12 complete: {} diffusion samples generated in {}ms",
1604 sample_count, stats.diffusion_enhancement_ms
1605 );
1606 }
1607 Err(_) => {
1608 let elapsed = start.elapsed();
1609 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1610 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
1611 }
1612 }
1613 }
1614
1615 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
1622 if !self.config.causal.enabled {
1623 debug!("Phase 13: Skipped (causal generation disabled)");
1624 return;
1625 }
1626
1627 info!("Phase 13: Starting Causal Overlay");
1628 let start = std::time::Instant::now();
1629
1630 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1631 let graph = match self.config.causal.template.as_str() {
1633 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
1634 _ => CausalGraph::fraud_detection_template(),
1635 };
1636
1637 let scm = StructuralCausalModel::new(graph.clone())
1638 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
1639
1640 let n_samples = self.config.causal.sample_size;
1641 let samples = scm
1642 .generate(n_samples, self.seed)
1643 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
1644
1645 let validation_passed = if self.config.causal.validate {
1647 let report = CausalValidator::validate_causal_structure(&samples, &graph);
1648 if report.valid {
1649 info!(
1650 "Causal validation passed: all {} checks OK",
1651 report.checks.len()
1652 );
1653 } else {
1654 warn!(
1655 "Causal validation: {} violations detected: {:?}",
1656 report.violations.len(),
1657 report.violations
1658 );
1659 }
1660 Some(report.valid)
1661 } else {
1662 None
1663 };
1664
1665 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
1666 }));
1667
1668 match result {
1669 Ok(Ok((sample_count, validation_passed))) => {
1670 stats.causal_samples_generated = sample_count;
1671 stats.causal_validation_passed = validation_passed;
1672 let elapsed = start.elapsed();
1673 stats.causal_generation_ms = elapsed.as_millis() as u64;
1674 info!(
1675 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
1676 sample_count, stats.causal_generation_ms, validation_passed,
1677 );
1678 }
1679 Ok(Err(e)) => {
1680 let elapsed = start.elapsed();
1681 stats.causal_generation_ms = elapsed.as_millis() as u64;
1682 warn!("Phase 13: Causal generation failed: {}", e);
1683 }
1684 Err(_) => {
1685 let elapsed = start.elapsed();
1686 stats.causal_generation_ms = elapsed.as_millis() as u64;
1687 warn!("Phase 13: Causal generation failed (panic caught), continuing");
1688 }
1689 }
1690 }
1691
1692 fn phase_sourcing_data(
1694 &mut self,
1695 stats: &mut EnhancedGenerationStatistics,
1696 ) -> SynthResult<SourcingSnapshot> {
1697 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
1698 debug!("Phase 14: Skipped (sourcing generation disabled)");
1699 return Ok(SourcingSnapshot::default());
1700 }
1701
1702 info!("Phase 14: Generating S2C Sourcing Data");
1703 let seed = self.seed;
1704
1705 let vendor_ids: Vec<String> = self
1707 .master_data
1708 .vendors
1709 .iter()
1710 .map(|v| v.vendor_id.clone())
1711 .collect();
1712 if vendor_ids.is_empty() {
1713 debug!("Phase 14: Skipped (no vendors available)");
1714 return Ok(SourcingSnapshot::default());
1715 }
1716
1717 let categories: Vec<(String, String)> = vec![
1718 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
1719 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
1720 ("CAT-IT".to_string(), "IT Equipment".to_string()),
1721 ("CAT-SVC".to_string(), "Professional Services".to_string()),
1722 ("CAT-LOG".to_string(), "Logistics".to_string()),
1723 ];
1724 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
1725 .iter()
1726 .map(|(id, name)| {
1727 (
1728 id.clone(),
1729 name.clone(),
1730 rust_decimal::Decimal::from(100_000),
1731 )
1732 })
1733 .collect();
1734
1735 let company_code = self
1736 .config
1737 .companies
1738 .first()
1739 .map(|c| c.code.as_str())
1740 .unwrap_or("1000");
1741 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1742 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1743 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1744 let fiscal_year = start_date.year() as u16;
1745 let owner_ids: Vec<String> = self
1746 .master_data
1747 .employees
1748 .iter()
1749 .take(5)
1750 .map(|e| e.employee_id.clone())
1751 .collect();
1752 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
1753
1754 let mut spend_gen = SpendAnalysisGenerator::new(seed);
1756 let spend_analyses =
1757 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
1758
1759 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
1761 let sourcing_projects = if owner_ids.is_empty() {
1762 Vec::new()
1763 } else {
1764 project_gen.generate(
1765 company_code,
1766 &categories_with_spend,
1767 &owner_ids,
1768 start_date,
1769 self.config.global.period_months,
1770 )
1771 };
1772 stats.sourcing_project_count = sourcing_projects.len();
1773
1774 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
1776 let mut qual_gen = QualificationGenerator::new(seed + 2);
1777 let qualifications = qual_gen.generate(
1778 company_code,
1779 &qual_vendor_ids,
1780 sourcing_projects.first().map(|p| p.project_id.as_str()),
1781 owner_id,
1782 start_date,
1783 );
1784
1785 let mut rfx_gen = RfxGenerator::new(seed + 3);
1787 let rfx_events: Vec<RfxEvent> = sourcing_projects
1788 .iter()
1789 .map(|proj| {
1790 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
1791 rfx_gen.generate(
1792 company_code,
1793 &proj.project_id,
1794 &proj.category_id,
1795 &qualified_vids,
1796 owner_id,
1797 start_date,
1798 50000.0,
1799 )
1800 })
1801 .collect();
1802 stats.rfx_event_count = rfx_events.len();
1803
1804 let mut bid_gen = BidGenerator::new(seed + 4);
1806 let mut all_bids = Vec::new();
1807 for rfx in &rfx_events {
1808 let bidder_count = vendor_ids.len().clamp(2, 5);
1809 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
1810 let bids = bid_gen.generate(rfx, &responding, start_date);
1811 all_bids.extend(bids);
1812 }
1813 stats.bid_count = all_bids.len();
1814
1815 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
1817 let bid_evaluations: Vec<BidEvaluation> = rfx_events
1818 .iter()
1819 .map(|rfx| {
1820 let rfx_bids: Vec<SupplierBid> = all_bids
1821 .iter()
1822 .filter(|b| b.rfx_id == rfx.rfx_id)
1823 .cloned()
1824 .collect();
1825 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
1826 })
1827 .collect();
1828
1829 let mut contract_gen = ContractGenerator::new(seed + 6);
1831 let contracts: Vec<ProcurementContract> = bid_evaluations
1832 .iter()
1833 .zip(rfx_events.iter())
1834 .filter_map(|(eval, rfx)| {
1835 eval.ranked_bids.first().and_then(|winner| {
1836 all_bids
1837 .iter()
1838 .find(|b| b.bid_id == winner.bid_id)
1839 .map(|winning_bid| {
1840 contract_gen.generate_from_bid(
1841 winning_bid,
1842 Some(&rfx.sourcing_project_id),
1843 &rfx.category_id,
1844 owner_id,
1845 start_date,
1846 )
1847 })
1848 })
1849 })
1850 .collect();
1851 stats.contract_count = contracts.len();
1852
1853 let mut catalog_gen = CatalogGenerator::new(seed + 7);
1855 let catalog_items = catalog_gen.generate(&contracts);
1856 stats.catalog_item_count = catalog_items.len();
1857
1858 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
1860 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
1861 .iter()
1862 .fold(
1863 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
1864 |mut acc, c| {
1865 acc.entry(c.vendor_id.clone()).or_default().push(c);
1866 acc
1867 },
1868 )
1869 .into_iter()
1870 .collect();
1871 let scorecards = scorecard_gen.generate(
1872 company_code,
1873 &vendor_contracts,
1874 start_date,
1875 end_date,
1876 owner_id,
1877 );
1878 stats.scorecard_count = scorecards.len();
1879
1880 info!(
1881 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
1882 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
1883 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
1884 );
1885 self.check_resources_with_log("post-sourcing")?;
1886
1887 Ok(SourcingSnapshot {
1888 spend_analyses,
1889 sourcing_projects,
1890 qualifications,
1891 rfx_events,
1892 bids: all_bids,
1893 bid_evaluations,
1894 contracts,
1895 catalog_items,
1896 scorecards,
1897 })
1898 }
1899
1900 fn phase_financial_reporting(
1902 &mut self,
1903 document_flows: &DocumentFlowSnapshot,
1904 stats: &mut EnhancedGenerationStatistics,
1905 ) -> SynthResult<FinancialReportingSnapshot> {
1906 let fs_enabled = self.phase_config.generate_financial_statements
1907 || self.config.financial_reporting.enabled;
1908 let br_enabled = self.phase_config.generate_bank_reconciliation;
1909
1910 if !fs_enabled && !br_enabled {
1911 debug!("Phase 15: Skipped (financial reporting disabled)");
1912 return Ok(FinancialReportingSnapshot::default());
1913 }
1914
1915 info!("Phase 15: Generating Financial Reporting Data");
1916
1917 let seed = self.seed;
1918 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1919 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1920
1921 let mut financial_statements = Vec::new();
1922 let bank_reconciliations = Vec::new();
1923
1924 if fs_enabled {
1926 let company_code = self
1927 .config
1928 .companies
1929 .first()
1930 .map(|c| c.code.as_str())
1931 .unwrap_or("1000");
1932 let currency = self
1933 .config
1934 .companies
1935 .first()
1936 .map(|c| c.currency.as_str())
1937 .unwrap_or("USD");
1938 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
1939
1940 for period in 0..self.config.global.period_months {
1942 let period_start = start_date + chrono::Months::new(period);
1943 let period_end =
1944 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
1945 let fiscal_year = period_end.year() as u16;
1946 let fiscal_period = period_end.month() as u8;
1947
1948 let tb_entries = self.build_trial_balance_from_flows(document_flows, &period_end);
1950
1951 let stmts = fs_gen.generate(
1952 company_code,
1953 currency,
1954 &tb_entries,
1955 period_start,
1956 period_end,
1957 fiscal_year,
1958 fiscal_period,
1959 None,
1960 "SYS-AUTOCLOSE",
1961 );
1962 financial_statements.extend(stmts);
1963 }
1964 stats.financial_statement_count = financial_statements.len();
1965 info!(
1966 "Financial statements generated: {} statements",
1967 stats.financial_statement_count
1968 );
1969 }
1970
1971 stats.bank_reconciliation_count = bank_reconciliations.len();
1972 self.check_resources_with_log("post-financial-reporting")?;
1973
1974 Ok(FinancialReportingSnapshot {
1975 financial_statements,
1976 bank_reconciliations,
1977 })
1978 }
1979
1980 fn build_trial_balance_from_flows(
1982 &self,
1983 flows: &DocumentFlowSnapshot,
1984 _period_end: &NaiveDate,
1985 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
1986 use rust_decimal::Decimal;
1987
1988 let mut entries = Vec::new();
1989
1990 let ar_total: Decimal = flows
1992 .customer_invoices
1993 .iter()
1994 .map(|ci| ci.total_gross_amount)
1995 .sum();
1996 if !ar_total.is_zero() {
1997 entries.push(datasynth_generators::TrialBalanceEntry {
1998 account_code: "1100".to_string(),
1999 account_name: "Accounts Receivable".to_string(),
2000 category: "Receivables".to_string(),
2001 debit_balance: ar_total,
2002 credit_balance: Decimal::ZERO,
2003 });
2004 }
2005
2006 let ap_total: Decimal = flows
2008 .vendor_invoices
2009 .iter()
2010 .map(|vi| vi.payable_amount)
2011 .sum();
2012 if !ap_total.is_zero() {
2013 entries.push(datasynth_generators::TrialBalanceEntry {
2014 account_code: "2000".to_string(),
2015 account_name: "Accounts Payable".to_string(),
2016 category: "Payables".to_string(),
2017 debit_balance: Decimal::ZERO,
2018 credit_balance: ap_total,
2019 });
2020 }
2021
2022 let revenue: Decimal = flows
2024 .customer_invoices
2025 .iter()
2026 .map(|ci| ci.total_gross_amount)
2027 .sum();
2028 if !revenue.is_zero() {
2029 entries.push(datasynth_generators::TrialBalanceEntry {
2030 account_code: "4000".to_string(),
2031 account_name: "Revenue".to_string(),
2032 category: "Revenue".to_string(),
2033 debit_balance: Decimal::ZERO,
2034 credit_balance: revenue,
2035 });
2036 }
2037
2038 let cogs: Decimal = flows
2040 .purchase_orders
2041 .iter()
2042 .map(|po| po.total_net_amount)
2043 .sum();
2044 if !cogs.is_zero() {
2045 entries.push(datasynth_generators::TrialBalanceEntry {
2046 account_code: "5000".to_string(),
2047 account_name: "Cost of Goods Sold".to_string(),
2048 category: "CostOfSales".to_string(),
2049 debit_balance: cogs,
2050 credit_balance: Decimal::ZERO,
2051 });
2052 }
2053
2054 let payments_out: Decimal = flows.payments.iter().map(|p| p.amount).sum();
2056 if !payments_out.is_zero() {
2057 entries.push(datasynth_generators::TrialBalanceEntry {
2058 account_code: "1000".to_string(),
2059 account_name: "Cash".to_string(),
2060 category: "Cash".to_string(),
2061 debit_balance: payments_out,
2062 credit_balance: Decimal::ZERO,
2063 });
2064 }
2065
2066 entries
2067 }
2068
2069 fn phase_hr_data(
2071 &mut self,
2072 stats: &mut EnhancedGenerationStatistics,
2073 ) -> SynthResult<HrSnapshot> {
2074 if !self.config.hr.enabled {
2075 debug!("Phase 16: Skipped (HR generation disabled)");
2076 return Ok(HrSnapshot::default());
2077 }
2078
2079 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
2080
2081 let seed = self.seed;
2082 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2083 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2084 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2085 let company_code = self
2086 .config
2087 .companies
2088 .first()
2089 .map(|c| c.code.as_str())
2090 .unwrap_or("1000");
2091 let currency = self
2092 .config
2093 .companies
2094 .first()
2095 .map(|c| c.currency.as_str())
2096 .unwrap_or("USD");
2097
2098 let employee_ids: Vec<String> = self
2099 .master_data
2100 .employees
2101 .iter()
2102 .map(|e| e.employee_id.clone())
2103 .collect();
2104
2105 if employee_ids.is_empty() {
2106 debug!("Phase 16: Skipped (no employees available)");
2107 return Ok(HrSnapshot::default());
2108 }
2109
2110 let mut snapshot = HrSnapshot::default();
2111
2112 if self.config.hr.payroll.enabled {
2114 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30);
2115 let employees_with_salary: Vec<(
2116 String,
2117 rust_decimal::Decimal,
2118 Option<String>,
2119 Option<String>,
2120 )> = self
2121 .master_data
2122 .employees
2123 .iter()
2124 .map(|e| {
2125 (
2126 e.employee_id.clone(),
2127 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
2129 e.department_id.clone(),
2130 )
2131 })
2132 .collect();
2133
2134 for month in 0..self.config.global.period_months {
2135 let period_start = start_date + chrono::Months::new(month);
2136 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
2137 let (run, items) = payroll_gen.generate(
2138 company_code,
2139 &employees_with_salary,
2140 period_start,
2141 period_end,
2142 currency,
2143 );
2144 let _ = run; snapshot.payroll_run_count += 1;
2146 snapshot.payroll_line_item_count += items.len();
2147 }
2148 }
2149
2150 if self.config.hr.time_attendance.enabled {
2152 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31);
2153 let entries = time_gen.generate(
2154 &employee_ids,
2155 start_date,
2156 end_date,
2157 &self.config.hr.time_attendance,
2158 );
2159 snapshot.time_entry_count = entries.len();
2160 }
2161
2162 if self.config.hr.expenses.enabled {
2164 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32);
2165 let reports = expense_gen.generate(
2166 &employee_ids,
2167 start_date,
2168 end_date,
2169 &self.config.hr.expenses,
2170 );
2171 snapshot.expense_report_count = reports.len();
2172 }
2173
2174 stats.payroll_run_count = snapshot.payroll_run_count;
2175 stats.time_entry_count = snapshot.time_entry_count;
2176 stats.expense_report_count = snapshot.expense_report_count;
2177
2178 info!(
2179 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
2180 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
2181 snapshot.time_entry_count, snapshot.expense_report_count
2182 );
2183 self.check_resources_with_log("post-hr")?;
2184
2185 Ok(snapshot)
2186 }
2187
2188 fn phase_accounting_standards(
2190 &mut self,
2191 stats: &mut EnhancedGenerationStatistics,
2192 ) -> SynthResult<AccountingStandardsSnapshot> {
2193 if !self.phase_config.generate_accounting_standards
2194 || !self.config.accounting_standards.enabled
2195 {
2196 debug!("Phase 17: Skipped (accounting standards generation disabled)");
2197 return Ok(AccountingStandardsSnapshot::default());
2198 }
2199 info!("Phase 17: Generating Accounting Standards Data");
2200
2201 let seed = self.seed;
2202 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2203 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2204 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2205 let company_code = self
2206 .config
2207 .companies
2208 .first()
2209 .map(|c| c.code.as_str())
2210 .unwrap_or("1000");
2211 let currency = self
2212 .config
2213 .companies
2214 .first()
2215 .map(|c| c.currency.as_str())
2216 .unwrap_or("USD");
2217
2218 let framework = match self.config.accounting_standards.framework {
2220 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => {
2221 datasynth_standards::framework::AccountingFramework::UsGaap
2222 }
2223 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => {
2224 datasynth_standards::framework::AccountingFramework::Ifrs
2225 }
2226 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
2227 datasynth_standards::framework::AccountingFramework::DualReporting
2228 }
2229 };
2230
2231 let mut snapshot = AccountingStandardsSnapshot::default();
2232
2233 if self.config.accounting_standards.revenue_recognition.enabled {
2235 let customer_ids: Vec<String> = self
2236 .master_data
2237 .customers
2238 .iter()
2239 .map(|c| c.customer_id.clone())
2240 .collect();
2241
2242 if !customer_ids.is_empty() {
2243 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
2244 let contracts = rev_gen.generate(
2245 company_code,
2246 &customer_ids,
2247 start_date,
2248 end_date,
2249 currency,
2250 &self.config.accounting_standards.revenue_recognition,
2251 framework,
2252 );
2253 snapshot.revenue_contract_count = contracts.len();
2254 }
2255 }
2256
2257 if self.config.accounting_standards.impairment.enabled {
2259 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
2260 .master_data
2261 .assets
2262 .iter()
2263 .map(|a| {
2264 (
2265 a.asset_id.clone(),
2266 a.description.clone(),
2267 a.acquisition_cost,
2268 )
2269 })
2270 .collect();
2271
2272 if !asset_data.is_empty() {
2273 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
2274 let tests = imp_gen.generate(
2275 company_code,
2276 &asset_data,
2277 end_date,
2278 &self.config.accounting_standards.impairment,
2279 framework,
2280 );
2281 snapshot.impairment_test_count = tests.len();
2282 }
2283 }
2284
2285 stats.revenue_contract_count = snapshot.revenue_contract_count;
2286 stats.impairment_test_count = snapshot.impairment_test_count;
2287
2288 info!(
2289 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
2290 snapshot.revenue_contract_count, snapshot.impairment_test_count
2291 );
2292 self.check_resources_with_log("post-accounting-standards")?;
2293
2294 Ok(snapshot)
2295 }
2296
2297 fn phase_manufacturing(
2299 &mut self,
2300 stats: &mut EnhancedGenerationStatistics,
2301 ) -> SynthResult<ManufacturingSnapshot> {
2302 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
2303 debug!("Phase 18: Skipped (manufacturing generation disabled)");
2304 return Ok(ManufacturingSnapshot::default());
2305 }
2306 info!("Phase 18: Generating Manufacturing Data");
2307
2308 let seed = self.seed;
2309 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2310 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2311 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2312 let company_code = self
2313 .config
2314 .companies
2315 .first()
2316 .map(|c| c.code.as_str())
2317 .unwrap_or("1000");
2318
2319 let material_data: Vec<(String, String)> = self
2320 .master_data
2321 .materials
2322 .iter()
2323 .map(|m| (m.material_id.clone(), m.description.clone()))
2324 .collect();
2325
2326 if material_data.is_empty() {
2327 debug!("Phase 18: Skipped (no materials available)");
2328 return Ok(ManufacturingSnapshot::default());
2329 }
2330
2331 let mut snapshot = ManufacturingSnapshot::default();
2332
2333 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
2335 let production_orders = prod_gen.generate(
2336 company_code,
2337 &material_data,
2338 start_date,
2339 end_date,
2340 &self.config.manufacturing.production_orders,
2341 &self.config.manufacturing.costing,
2342 &self.config.manufacturing.routing,
2343 );
2344 snapshot.production_order_count = production_orders.len();
2345
2346 let inspection_data: Vec<(String, String, String)> = production_orders
2348 .iter()
2349 .map(|po| {
2350 (
2351 po.order_id.clone(),
2352 po.material_id.clone(),
2353 po.material_description.clone(),
2354 )
2355 })
2356 .collect();
2357
2358 if !inspection_data.is_empty() {
2359 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
2360 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
2361 snapshot.quality_inspection_count = inspections.len();
2362 }
2363
2364 let storage_locations: Vec<(String, String)> = material_data
2366 .iter()
2367 .enumerate()
2368 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
2369 .collect();
2370
2371 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52);
2372 let mut cycle_count_total = 0usize;
2373 for month in 0..self.config.global.period_months {
2374 let count_date = start_date + chrono::Months::new(month);
2375 let items_per_count = storage_locations.len().clamp(10, 50);
2376 let _cc = cc_gen.generate(
2377 company_code,
2378 &storage_locations,
2379 count_date,
2380 items_per_count,
2381 );
2382 cycle_count_total += 1;
2383 }
2384 snapshot.cycle_count_count = cycle_count_total;
2385
2386 stats.production_order_count = snapshot.production_order_count;
2387 stats.quality_inspection_count = snapshot.quality_inspection_count;
2388 stats.cycle_count_count = snapshot.cycle_count_count;
2389
2390 info!(
2391 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
2392 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
2393 );
2394 self.check_resources_with_log("post-manufacturing")?;
2395
2396 Ok(snapshot)
2397 }
2398
2399 fn phase_sales_kpi_budgets(
2401 &mut self,
2402 coa: &Arc<ChartOfAccounts>,
2403 stats: &mut EnhancedGenerationStatistics,
2404 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
2405 if !self.phase_config.generate_sales_kpi_budgets {
2406 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
2407 return Ok(SalesKpiBudgetsSnapshot::default());
2408 }
2409 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
2410
2411 let seed = self.seed;
2412 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2413 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2414 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2415 let company_code = self
2416 .config
2417 .companies
2418 .first()
2419 .map(|c| c.code.as_str())
2420 .unwrap_or("1000");
2421
2422 let mut snapshot = SalesKpiBudgetsSnapshot::default();
2423
2424 if self.config.sales_quotes.enabled {
2426 let customer_data: Vec<(String, String)> = self
2427 .master_data
2428 .customers
2429 .iter()
2430 .map(|c| (c.customer_id.clone(), c.name.clone()))
2431 .collect();
2432 let material_data: Vec<(String, String)> = self
2433 .master_data
2434 .materials
2435 .iter()
2436 .map(|m| (m.material_id.clone(), m.description.clone()))
2437 .collect();
2438
2439 if !customer_data.is_empty() && !material_data.is_empty() {
2440 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60);
2441 let quotes = quote_gen.generate(
2442 company_code,
2443 &customer_data,
2444 &material_data,
2445 start_date,
2446 end_date,
2447 &self.config.sales_quotes,
2448 );
2449 snapshot.sales_quote_count = quotes.len();
2450 }
2451 }
2452
2453 if self.config.financial_reporting.management_kpis.enabled {
2455 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
2456 let kpis = kpi_gen.generate(
2457 company_code,
2458 start_date,
2459 end_date,
2460 &self.config.financial_reporting.management_kpis,
2461 );
2462 snapshot.kpi_count = kpis.len();
2463 }
2464
2465 if self.config.financial_reporting.budgets.enabled {
2467 let account_data: Vec<(String, String)> = coa
2468 .accounts
2469 .iter()
2470 .map(|a| (a.account_number.clone(), a.short_description.clone()))
2471 .collect();
2472
2473 if !account_data.is_empty() {
2474 let fiscal_year = start_date.year() as u32;
2475 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
2476 let budget = budget_gen.generate(
2477 company_code,
2478 fiscal_year,
2479 &account_data,
2480 &self.config.financial_reporting.budgets,
2481 );
2482 snapshot.budget_line_count = budget.line_items.len();
2483 }
2484 }
2485
2486 stats.sales_quote_count = snapshot.sales_quote_count;
2487 stats.kpi_count = snapshot.kpi_count;
2488 stats.budget_line_count = snapshot.budget_line_count;
2489
2490 info!(
2491 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
2492 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
2493 );
2494 self.check_resources_with_log("post-sales-kpi-budgets")?;
2495
2496 Ok(snapshot)
2497 }
2498
2499 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
2501 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
2502
2503 let mut gen = ChartOfAccountsGenerator::new(
2504 self.config.chart_of_accounts.complexity,
2505 self.config.global.industry,
2506 self.seed,
2507 );
2508
2509 let coa = Arc::new(gen.generate());
2510 self.coa = Some(Arc::clone(&coa));
2511
2512 if let Some(pb) = pb {
2513 pb.finish_with_message("Chart of Accounts complete");
2514 }
2515
2516 Ok(coa)
2517 }
2518
2519 fn generate_master_data(&mut self) -> SynthResult<()> {
2521 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2522 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2523 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2524
2525 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
2527
2528 for (i, company) in self.config.companies.iter().enumerate() {
2529 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
2530
2531 let mut vendor_gen = VendorGenerator::new(company_seed);
2533 let vendor_pool = vendor_gen.generate_vendor_pool(
2534 self.phase_config.vendors_per_company,
2535 &company.code,
2536 start_date,
2537 );
2538 self.master_data.vendors.extend(vendor_pool.vendors);
2539 if let Some(pb) = &pb {
2540 pb.inc(1);
2541 }
2542
2543 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
2545 let customer_pool = customer_gen.generate_customer_pool(
2546 self.phase_config.customers_per_company,
2547 &company.code,
2548 start_date,
2549 );
2550 self.master_data.customers.extend(customer_pool.customers);
2551 if let Some(pb) = &pb {
2552 pb.inc(1);
2553 }
2554
2555 let mut material_gen = MaterialGenerator::new(company_seed + 200);
2557 let material_pool = material_gen.generate_material_pool(
2558 self.phase_config.materials_per_company,
2559 &company.code,
2560 start_date,
2561 );
2562 self.master_data.materials.extend(material_pool.materials);
2563 if let Some(pb) = &pb {
2564 pb.inc(1);
2565 }
2566
2567 let mut asset_gen = AssetGenerator::new(company_seed + 300);
2569 let asset_pool = asset_gen.generate_asset_pool(
2570 self.phase_config.assets_per_company,
2571 &company.code,
2572 (start_date, end_date),
2573 );
2574 self.master_data.assets.extend(asset_pool.assets);
2575 if let Some(pb) = &pb {
2576 pb.inc(1);
2577 }
2578
2579 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
2581 let employee_pool =
2582 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
2583 self.master_data.employees.extend(employee_pool.employees);
2584 if let Some(pb) = &pb {
2585 pb.inc(1);
2586 }
2587 }
2588
2589 if let Some(pb) = pb {
2590 pb.finish_with_message("Master data generation complete");
2591 }
2592
2593 Ok(())
2594 }
2595
2596 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
2598 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2599 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2600
2601 let p2p_count = self
2603 .phase_config
2604 .p2p_chains
2605 .min(self.master_data.vendors.len() * 2);
2606 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
2607
2608 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
2610 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
2611
2612 for i in 0..p2p_count {
2613 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
2614 let materials: Vec<&Material> = self
2615 .master_data
2616 .materials
2617 .iter()
2618 .skip(i % self.master_data.materials.len().max(1))
2619 .take(2.min(self.master_data.materials.len()))
2620 .collect();
2621
2622 if materials.is_empty() {
2623 continue;
2624 }
2625
2626 let company = &self.config.companies[i % self.config.companies.len()];
2627 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
2628 let fiscal_period = po_date.month() as u8;
2629 let created_by = self
2630 .master_data
2631 .employees
2632 .first()
2633 .map(|e| e.user_id.as_str())
2634 .unwrap_or("SYSTEM");
2635
2636 let chain = p2p_gen.generate_chain(
2637 &company.code,
2638 vendor,
2639 &materials,
2640 po_date,
2641 start_date.year() as u16,
2642 fiscal_period,
2643 created_by,
2644 );
2645
2646 flows.purchase_orders.push(chain.purchase_order.clone());
2648 flows.goods_receipts.extend(chain.goods_receipts.clone());
2649 if let Some(vi) = &chain.vendor_invoice {
2650 flows.vendor_invoices.push(vi.clone());
2651 }
2652 if let Some(payment) = &chain.payment {
2653 flows.payments.push(payment.clone());
2654 }
2655 flows.p2p_chains.push(chain);
2656
2657 if let Some(pb) = &pb {
2658 pb.inc(1);
2659 }
2660 }
2661
2662 if let Some(pb) = pb {
2663 pb.finish_with_message("P2P document flows complete");
2664 }
2665
2666 let o2c_count = self
2668 .phase_config
2669 .o2c_chains
2670 .min(self.master_data.customers.len() * 2);
2671 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
2672
2673 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
2675 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
2676
2677 for i in 0..o2c_count {
2678 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
2679 let materials: Vec<&Material> = self
2680 .master_data
2681 .materials
2682 .iter()
2683 .skip(i % self.master_data.materials.len().max(1))
2684 .take(2.min(self.master_data.materials.len()))
2685 .collect();
2686
2687 if materials.is_empty() {
2688 continue;
2689 }
2690
2691 let company = &self.config.companies[i % self.config.companies.len()];
2692 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
2693 let fiscal_period = so_date.month() as u8;
2694 let created_by = self
2695 .master_data
2696 .employees
2697 .first()
2698 .map(|e| e.user_id.as_str())
2699 .unwrap_or("SYSTEM");
2700
2701 let chain = o2c_gen.generate_chain(
2702 &company.code,
2703 customer,
2704 &materials,
2705 so_date,
2706 start_date.year() as u16,
2707 fiscal_period,
2708 created_by,
2709 );
2710
2711 flows.sales_orders.push(chain.sales_order.clone());
2713 flows.deliveries.extend(chain.deliveries.clone());
2714 if let Some(ci) = &chain.customer_invoice {
2715 flows.customer_invoices.push(ci.clone());
2716 }
2717 if let Some(receipt) = &chain.customer_receipt {
2718 flows.payments.push(receipt.clone());
2719 }
2720 flows.o2c_chains.push(chain);
2721
2722 if let Some(pb) = &pb {
2723 pb.inc(1);
2724 }
2725 }
2726
2727 if let Some(pb) = pb {
2728 pb.finish_with_message("O2C document flows complete");
2729 }
2730
2731 Ok(())
2732 }
2733
2734 fn generate_journal_entries(
2736 &mut self,
2737 coa: &Arc<ChartOfAccounts>,
2738 ) -> SynthResult<Vec<JournalEntry>> {
2739 let total = self.calculate_total_transactions();
2740 let pb = self.create_progress_bar(total, "Generating Journal Entries");
2741
2742 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2743 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2744 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2745
2746 let company_codes: Vec<String> = self
2747 .config
2748 .companies
2749 .iter()
2750 .map(|c| c.code.clone())
2751 .collect();
2752
2753 let generator = JournalEntryGenerator::new_with_params(
2754 self.config.transactions.clone(),
2755 Arc::clone(coa),
2756 company_codes,
2757 start_date,
2758 end_date,
2759 self.seed,
2760 );
2761
2762 let mut generator = generator
2766 .with_master_data(
2767 &self.master_data.vendors,
2768 &self.master_data.customers,
2769 &self.master_data.materials,
2770 )
2771 .with_persona_errors(true)
2772 .with_fraud_config(self.config.fraud.clone());
2773
2774 if self.config.temporal.enabled {
2776 let drift_config = self.config.temporal.to_core_config();
2777 generator = generator.with_drift_config(drift_config, self.seed + 100);
2778 }
2779
2780 let mut entries = Vec::with_capacity(total as usize);
2781
2782 self.check_memory_limit()?;
2784
2785 const MEMORY_CHECK_INTERVAL: u64 = 1000;
2787
2788 for i in 0..total {
2789 let entry = generator.generate();
2790 entries.push(entry);
2791 if let Some(pb) = &pb {
2792 pb.inc(1);
2793 }
2794
2795 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
2797 self.check_memory_limit()?;
2798 }
2799 }
2800
2801 if let Some(pb) = pb {
2802 pb.finish_with_message("Journal entries complete");
2803 }
2804
2805 Ok(entries)
2806 }
2807
2808 fn generate_jes_from_document_flows(
2813 &mut self,
2814 flows: &DocumentFlowSnapshot,
2815 ) -> SynthResult<Vec<JournalEntry>> {
2816 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2817 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
2818
2819 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
2820 DocumentFlowJeConfig::default(),
2821 self.seed,
2822 );
2823 let mut entries = Vec::new();
2824
2825 for chain in &flows.p2p_chains {
2827 let chain_entries = generator.generate_from_p2p_chain(chain);
2828 entries.extend(chain_entries);
2829 if let Some(pb) = &pb {
2830 pb.inc(1);
2831 }
2832 }
2833
2834 for chain in &flows.o2c_chains {
2836 let chain_entries = generator.generate_from_o2c_chain(chain);
2837 entries.extend(chain_entries);
2838 if let Some(pb) = &pb {
2839 pb.inc(1);
2840 }
2841 }
2842
2843 if let Some(pb) = pb {
2844 pb.finish_with_message(format!(
2845 "Generated {} JEs from document flows",
2846 entries.len()
2847 ));
2848 }
2849
2850 Ok(entries)
2851 }
2852
2853 fn link_document_flows_to_subledgers(
2858 &mut self,
2859 flows: &DocumentFlowSnapshot,
2860 ) -> SynthResult<SubledgerSnapshot> {
2861 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
2862 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
2863
2864 let mut linker = DocumentFlowLinker::new();
2865
2866 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
2868 if let Some(pb) = &pb {
2869 pb.inc(flows.vendor_invoices.len() as u64);
2870 }
2871
2872 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
2874 if let Some(pb) = &pb {
2875 pb.inc(flows.customer_invoices.len() as u64);
2876 }
2877
2878 if let Some(pb) = pb {
2879 pb.finish_with_message(format!(
2880 "Linked {} AP and {} AR invoices",
2881 ap_invoices.len(),
2882 ar_invoices.len()
2883 ));
2884 }
2885
2886 Ok(SubledgerSnapshot {
2887 ap_invoices,
2888 ar_invoices,
2889 })
2890 }
2891
2892 fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
2897 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2898 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
2899
2900 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
2902 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
2903
2904 let ocpm_config = OcpmGeneratorConfig {
2906 generate_p2p: true,
2907 generate_o2c: true,
2908 happy_path_rate: 0.75,
2909 exception_path_rate: 0.20,
2910 error_path_rate: 0.05,
2911 add_duration_variability: true,
2912 duration_std_dev_factor: 0.3,
2913 };
2914 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
2915
2916 let available_users: Vec<String> = self
2918 .master_data
2919 .employees
2920 .iter()
2921 .take(20)
2922 .map(|e| e.user_id.clone())
2923 .collect();
2924
2925 for chain in &flows.p2p_chains {
2927 let po = &chain.purchase_order;
2928 let documents = P2pDocuments::new(
2929 &po.header.document_id,
2930 &po.vendor_id,
2931 &po.header.company_code,
2932 po.total_net_amount,
2933 &po.header.currency,
2934 )
2935 .with_goods_receipt(
2936 chain
2937 .goods_receipts
2938 .first()
2939 .map(|gr| gr.header.document_id.as_str())
2940 .unwrap_or(""),
2941 )
2942 .with_invoice(
2943 chain
2944 .vendor_invoice
2945 .as_ref()
2946 .map(|vi| vi.header.document_id.as_str())
2947 .unwrap_or(""),
2948 )
2949 .with_payment(
2950 chain
2951 .payment
2952 .as_ref()
2953 .map(|p| p.header.document_id.as_str())
2954 .unwrap_or(""),
2955 );
2956
2957 let start_time =
2958 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
2959 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
2960
2961 for event in result.events {
2963 event_log.add_event(event);
2964 }
2965 for object in result.objects {
2966 event_log.add_object(object);
2967 }
2968 for relationship in result.relationships {
2969 event_log.add_relationship(relationship);
2970 }
2971 event_log.add_case(result.case_trace);
2972
2973 if let Some(pb) = &pb {
2974 pb.inc(1);
2975 }
2976 }
2977
2978 for chain in &flows.o2c_chains {
2980 let so = &chain.sales_order;
2981 let documents = O2cDocuments::new(
2982 &so.header.document_id,
2983 &so.customer_id,
2984 &so.header.company_code,
2985 so.total_net_amount,
2986 &so.header.currency,
2987 )
2988 .with_delivery(
2989 chain
2990 .deliveries
2991 .first()
2992 .map(|d| d.header.document_id.as_str())
2993 .unwrap_or(""),
2994 )
2995 .with_invoice(
2996 chain
2997 .customer_invoice
2998 .as_ref()
2999 .map(|ci| ci.header.document_id.as_str())
3000 .unwrap_or(""),
3001 )
3002 .with_receipt(
3003 chain
3004 .customer_receipt
3005 .as_ref()
3006 .map(|r| r.header.document_id.as_str())
3007 .unwrap_or(""),
3008 );
3009
3010 let start_time =
3011 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
3012 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
3013
3014 for event in result.events {
3016 event_log.add_event(event);
3017 }
3018 for object in result.objects {
3019 event_log.add_object(object);
3020 }
3021 for relationship in result.relationships {
3022 event_log.add_relationship(relationship);
3023 }
3024 event_log.add_case(result.case_trace);
3025
3026 if let Some(pb) = &pb {
3027 pb.inc(1);
3028 }
3029 }
3030
3031 event_log.compute_variants();
3033
3034 let summary = event_log.summary();
3035
3036 if let Some(pb) = pb {
3037 pb.finish_with_message(format!(
3038 "Generated {} OCPM events, {} objects",
3039 summary.event_count, summary.object_count
3040 ));
3041 }
3042
3043 Ok(OcpmSnapshot {
3044 event_count: summary.event_count,
3045 object_count: summary.object_count,
3046 case_count: summary.case_count,
3047 event_log: Some(event_log),
3048 })
3049 }
3050
3051 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
3053 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
3054
3055 let anomaly_config = AnomalyInjectorConfig {
3056 rates: AnomalyRateConfig {
3057 total_rate: 0.02,
3058 ..Default::default()
3059 },
3060 seed: self.seed + 5000,
3061 ..Default::default()
3062 };
3063
3064 let mut injector = AnomalyInjector::new(anomaly_config);
3065 let result = injector.process_entries(entries);
3066
3067 if let Some(pb) = &pb {
3068 pb.inc(entries.len() as u64);
3069 pb.finish_with_message("Anomaly injection complete");
3070 }
3071
3072 let mut by_type = HashMap::new();
3073 for label in &result.labels {
3074 *by_type
3075 .entry(format!("{:?}", label.anomaly_type))
3076 .or_insert(0) += 1;
3077 }
3078
3079 Ok(AnomalyLabels {
3080 labels: result.labels,
3081 summary: Some(result.summary),
3082 by_type,
3083 })
3084 }
3085
3086 fn validate_journal_entries(
3095 &mut self,
3096 entries: &[JournalEntry],
3097 ) -> SynthResult<BalanceValidationResult> {
3098 let clean_entries: Vec<&JournalEntry> = entries
3100 .iter()
3101 .filter(|e| {
3102 e.header
3103 .header_text
3104 .as_ref()
3105 .map(|t| !t.contains("[HUMAN_ERROR:"))
3106 .unwrap_or(true)
3107 })
3108 .collect();
3109
3110 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
3111
3112 let config = BalanceTrackerConfig {
3114 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
3118 };
3119
3120 let mut tracker = RunningBalanceTracker::new(config);
3121
3122 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
3124 let errors = tracker.apply_entries(&clean_refs);
3125
3126 if let Some(pb) = &pb {
3127 pb.inc(entries.len() as u64);
3128 }
3129
3130 let has_unbalanced = tracker
3133 .get_validation_errors()
3134 .iter()
3135 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
3136
3137 let mut all_errors = errors;
3140 all_errors.extend(tracker.get_validation_errors().iter().cloned());
3141 let company_codes: Vec<String> = self
3142 .config
3143 .companies
3144 .iter()
3145 .map(|c| c.code.clone())
3146 .collect();
3147
3148 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3149 .map(|d| d + chrono::Months::new(self.config.global.period_months))
3150 .unwrap_or_else(|_| chrono::Local::now().date_naive());
3151
3152 for company_code in &company_codes {
3153 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
3154 all_errors.push(e);
3155 }
3156 }
3157
3158 let stats = tracker.get_statistics();
3160
3161 let is_balanced = all_errors.is_empty();
3163
3164 if let Some(pb) = pb {
3165 let msg = if is_balanced {
3166 "Balance validation passed"
3167 } else {
3168 "Balance validation completed with errors"
3169 };
3170 pb.finish_with_message(msg);
3171 }
3172
3173 Ok(BalanceValidationResult {
3174 validated: true,
3175 is_balanced,
3176 entries_processed: stats.entries_processed,
3177 total_debits: stats.total_debits,
3178 total_credits: stats.total_credits,
3179 accounts_tracked: stats.accounts_tracked,
3180 companies_tracked: stats.companies_tracked,
3181 validation_errors: all_errors,
3182 has_unbalanced_entries: has_unbalanced,
3183 })
3184 }
3185
3186 fn inject_data_quality(
3191 &mut self,
3192 entries: &mut [JournalEntry],
3193 ) -> SynthResult<DataQualityStats> {
3194 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
3195
3196 let config = DataQualityConfig::minimal();
3198 let mut injector = DataQualityInjector::new(config);
3199
3200 let context = HashMap::new();
3202
3203 for entry in entries.iter_mut() {
3204 if let Some(text) = &entry.header.header_text {
3206 let processed = injector.process_text_field(
3207 "header_text",
3208 text,
3209 &entry.header.document_id.to_string(),
3210 &context,
3211 );
3212 match processed {
3213 Some(new_text) if new_text != *text => {
3214 entry.header.header_text = Some(new_text);
3215 }
3216 None => {
3217 entry.header.header_text = None; }
3219 _ => {}
3220 }
3221 }
3222
3223 if let Some(ref_text) = &entry.header.reference {
3225 let processed = injector.process_text_field(
3226 "reference",
3227 ref_text,
3228 &entry.header.document_id.to_string(),
3229 &context,
3230 );
3231 match processed {
3232 Some(new_text) if new_text != *ref_text => {
3233 entry.header.reference = Some(new_text);
3234 }
3235 None => {
3236 entry.header.reference = None;
3237 }
3238 _ => {}
3239 }
3240 }
3241
3242 let user_persona = entry.header.user_persona.clone();
3244 if let Some(processed) = injector.process_text_field(
3245 "user_persona",
3246 &user_persona,
3247 &entry.header.document_id.to_string(),
3248 &context,
3249 ) {
3250 if processed != user_persona {
3251 entry.header.user_persona = processed;
3252 }
3253 }
3254
3255 for line in &mut entry.lines {
3257 if let Some(ref text) = line.line_text {
3259 let processed = injector.process_text_field(
3260 "line_text",
3261 text,
3262 &entry.header.document_id.to_string(),
3263 &context,
3264 );
3265 match processed {
3266 Some(new_text) if new_text != *text => {
3267 line.line_text = Some(new_text);
3268 }
3269 None => {
3270 line.line_text = None;
3271 }
3272 _ => {}
3273 }
3274 }
3275
3276 if let Some(cc) = &line.cost_center {
3278 let processed = injector.process_text_field(
3279 "cost_center",
3280 cc,
3281 &entry.header.document_id.to_string(),
3282 &context,
3283 );
3284 match processed {
3285 Some(new_cc) if new_cc != *cc => {
3286 line.cost_center = Some(new_cc);
3287 }
3288 None => {
3289 line.cost_center = None;
3290 }
3291 _ => {}
3292 }
3293 }
3294 }
3295
3296 if let Some(pb) = &pb {
3297 pb.inc(1);
3298 }
3299 }
3300
3301 if let Some(pb) = pb {
3302 pb.finish_with_message("Data quality injection complete");
3303 }
3304
3305 Ok(injector.stats().clone())
3306 }
3307
3308 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
3319 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3320 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3321 let fiscal_year = start_date.year() as u16;
3322 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
3323
3324 let total_revenue: rust_decimal::Decimal = entries
3326 .iter()
3327 .flat_map(|e| e.lines.iter())
3328 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
3329 .map(|l| l.credit_amount)
3330 .sum();
3331
3332 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
3334
3335 let mut snapshot = AuditSnapshot::default();
3336
3337 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
3339 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
3340 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
3341 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
3342 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
3343 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
3344
3345 let accounts: Vec<String> = self
3347 .coa
3348 .as_ref()
3349 .map(|coa| {
3350 coa.get_postable_accounts()
3351 .iter()
3352 .map(|acc| acc.account_code().to_string())
3353 .collect()
3354 })
3355 .unwrap_or_default();
3356
3357 for (i, company) in self.config.companies.iter().enumerate() {
3359 let company_revenue = total_revenue
3361 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
3362
3363 let engagements_for_company =
3365 self.phase_config.audit_engagements / self.config.companies.len().max(1);
3366 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
3367 1
3368 } else {
3369 0
3370 };
3371
3372 for _eng_idx in 0..(engagements_for_company + extra) {
3373 let engagement = engagement_gen.generate_engagement(
3375 &company.code,
3376 &company.name,
3377 fiscal_year,
3378 period_end,
3379 company_revenue,
3380 None, );
3382
3383 if let Some(pb) = &pb {
3384 pb.inc(1);
3385 }
3386
3387 let team_members: Vec<String> = engagement.team_member_ids.clone();
3389
3390 let workpapers =
3392 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
3393
3394 for wp in &workpapers {
3395 if let Some(pb) = &pb {
3396 pb.inc(1);
3397 }
3398
3399 let evidence = evidence_gen.generate_evidence_for_workpaper(
3401 wp,
3402 &team_members,
3403 wp.preparer_date,
3404 );
3405
3406 for _ in &evidence {
3407 if let Some(pb) = &pb {
3408 pb.inc(1);
3409 }
3410 }
3411
3412 snapshot.evidence.extend(evidence);
3413 }
3414
3415 let risks =
3417 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
3418
3419 for _ in &risks {
3420 if let Some(pb) = &pb {
3421 pb.inc(1);
3422 }
3423 }
3424 snapshot.risk_assessments.extend(risks);
3425
3426 let findings = finding_gen.generate_findings_for_engagement(
3428 &engagement,
3429 &workpapers,
3430 &team_members,
3431 );
3432
3433 for _ in &findings {
3434 if let Some(pb) = &pb {
3435 pb.inc(1);
3436 }
3437 }
3438 snapshot.findings.extend(findings);
3439
3440 let judgments =
3442 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
3443
3444 for _ in &judgments {
3445 if let Some(pb) = &pb {
3446 pb.inc(1);
3447 }
3448 }
3449 snapshot.judgments.extend(judgments);
3450
3451 snapshot.workpapers.extend(workpapers);
3453 snapshot.engagements.push(engagement);
3454 }
3455 }
3456
3457 if let Some(pb) = pb {
3458 pb.finish_with_message(format!(
3459 "Audit data: {} engagements, {} workpapers, {} evidence",
3460 snapshot.engagements.len(),
3461 snapshot.workpapers.len(),
3462 snapshot.evidence.len()
3463 ));
3464 }
3465
3466 Ok(snapshot)
3467 }
3468
3469 fn export_graphs(
3476 &mut self,
3477 entries: &[JournalEntry],
3478 _coa: &Arc<ChartOfAccounts>,
3479 stats: &mut EnhancedGenerationStatistics,
3480 ) -> SynthResult<GraphExportSnapshot> {
3481 let pb = self.create_progress_bar(100, "Exporting Graphs");
3482
3483 let mut snapshot = GraphExportSnapshot::default();
3484
3485 let output_dir = self
3487 .output_path
3488 .clone()
3489 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3490 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
3491
3492 for graph_type in &self.config.graph_export.graph_types {
3494 if let Some(pb) = &pb {
3495 pb.inc(10);
3496 }
3497
3498 let graph_config = TransactionGraphConfig {
3500 include_vendors: false,
3501 include_customers: false,
3502 create_debit_credit_edges: true,
3503 include_document_nodes: graph_type.include_document_nodes,
3504 min_edge_weight: graph_type.min_edge_weight,
3505 aggregate_parallel_edges: graph_type.aggregate_edges,
3506 };
3507
3508 let mut builder = TransactionGraphBuilder::new(graph_config);
3509 builder.add_journal_entries(entries);
3510 let graph = builder.build();
3511
3512 stats.graph_node_count += graph.node_count();
3514 stats.graph_edge_count += graph.edge_count();
3515
3516 if let Some(pb) = &pb {
3517 pb.inc(40);
3518 }
3519
3520 for format in &self.config.graph_export.formats {
3522 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
3523
3524 if let Err(e) = std::fs::create_dir_all(&format_dir) {
3526 warn!("Failed to create graph output directory: {}", e);
3527 continue;
3528 }
3529
3530 match format {
3531 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
3532 let pyg_config = PyGExportConfig {
3533 common: datasynth_graph::CommonExportConfig {
3534 export_node_features: true,
3535 export_edge_features: true,
3536 export_node_labels: true,
3537 export_edge_labels: true,
3538 export_masks: true,
3539 train_ratio: self.config.graph_export.train_ratio,
3540 val_ratio: self.config.graph_export.validation_ratio,
3541 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
3542 },
3543 one_hot_categoricals: false,
3544 };
3545
3546 let exporter = PyGExporter::new(pyg_config);
3547 match exporter.export(&graph, &format_dir) {
3548 Ok(metadata) => {
3549 snapshot.exports.insert(
3550 format!("{}_{}", graph_type.name, "pytorch_geometric"),
3551 GraphExportInfo {
3552 name: graph_type.name.clone(),
3553 format: "pytorch_geometric".to_string(),
3554 output_path: format_dir.clone(),
3555 node_count: metadata.num_nodes,
3556 edge_count: metadata.num_edges,
3557 },
3558 );
3559 snapshot.graph_count += 1;
3560 }
3561 Err(e) => {
3562 warn!("Failed to export PyTorch Geometric graph: {}", e);
3563 }
3564 }
3565 }
3566 datasynth_config::schema::GraphExportFormat::Neo4j => {
3567 debug!("Neo4j export not yet implemented for accounting networks");
3569 }
3570 datasynth_config::schema::GraphExportFormat::Dgl => {
3571 debug!("DGL export not yet implemented for accounting networks");
3573 }
3574 datasynth_config::schema::GraphExportFormat::RustGraph => {
3575 use datasynth_graph::{
3576 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
3577 };
3578
3579 let rustgraph_config = RustGraphExportConfig {
3580 include_features: true,
3581 include_temporal: true,
3582 include_labels: true,
3583 source_name: "datasynth".to_string(),
3584 batch_id: None,
3585 output_format: RustGraphOutputFormat::JsonLines,
3586 export_node_properties: true,
3587 export_edge_properties: true,
3588 pretty_print: false,
3589 };
3590
3591 let exporter = RustGraphExporter::new(rustgraph_config);
3592 match exporter.export(&graph, &format_dir) {
3593 Ok(metadata) => {
3594 snapshot.exports.insert(
3595 format!("{}_{}", graph_type.name, "rustgraph"),
3596 GraphExportInfo {
3597 name: graph_type.name.clone(),
3598 format: "rustgraph".to_string(),
3599 output_path: format_dir.clone(),
3600 node_count: metadata.num_nodes,
3601 edge_count: metadata.num_edges,
3602 },
3603 );
3604 snapshot.graph_count += 1;
3605 }
3606 Err(e) => {
3607 warn!("Failed to export RustGraph: {}", e);
3608 }
3609 }
3610 }
3611 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
3612 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
3614 }
3615 }
3616 }
3617
3618 if let Some(pb) = &pb {
3619 pb.inc(40);
3620 }
3621 }
3622
3623 stats.graph_export_count = snapshot.graph_count;
3624 snapshot.exported = snapshot.graph_count > 0;
3625
3626 if let Some(pb) = pb {
3627 pb.finish_with_message(format!(
3628 "Graphs exported: {} graphs ({} nodes, {} edges)",
3629 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3630 ));
3631 }
3632
3633 Ok(snapshot)
3634 }
3635
3636 fn export_hypergraph(
3643 &self,
3644 coa: &Arc<ChartOfAccounts>,
3645 entries: &[JournalEntry],
3646 document_flows: &DocumentFlowSnapshot,
3647 stats: &mut EnhancedGenerationStatistics,
3648 ) -> SynthResult<HypergraphExportInfo> {
3649 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
3650 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
3651 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
3652 use datasynth_graph::models::hypergraph::AggregationStrategy;
3653
3654 let hg_settings = &self.config.graph_export.hypergraph;
3655
3656 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
3658 "truncate" => AggregationStrategy::Truncate,
3659 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
3660 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
3661 "importance_sample" => AggregationStrategy::ImportanceSample,
3662 _ => AggregationStrategy::PoolByCounterparty,
3663 };
3664
3665 let builder_config = HypergraphConfig {
3666 max_nodes: hg_settings.max_nodes,
3667 aggregation_strategy,
3668 include_coso: hg_settings.governance_layer.include_coso,
3669 include_controls: hg_settings.governance_layer.include_controls,
3670 include_sox: hg_settings.governance_layer.include_sox,
3671 include_vendors: hg_settings.governance_layer.include_vendors,
3672 include_customers: hg_settings.governance_layer.include_customers,
3673 include_employees: hg_settings.governance_layer.include_employees,
3674 include_p2p: hg_settings.process_layer.include_p2p,
3675 include_o2c: hg_settings.process_layer.include_o2c,
3676 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
3677 docs_per_counterparty_threshold: hg_settings
3678 .process_layer
3679 .docs_per_counterparty_threshold,
3680 include_accounts: hg_settings.accounting_layer.include_accounts,
3681 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
3682 include_cross_layer_edges: hg_settings.cross_layer.enabled,
3683 };
3684
3685 let mut builder = HypergraphBuilder::new(builder_config);
3686
3687 builder.add_coso_framework();
3689
3690 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
3693 let controls = InternalControl::standard_controls();
3694 builder.add_controls(&controls);
3695 }
3696
3697 builder.add_vendors(&self.master_data.vendors);
3699 builder.add_customers(&self.master_data.customers);
3700 builder.add_employees(&self.master_data.employees);
3701
3702 builder.add_p2p_documents(
3704 &document_flows.purchase_orders,
3705 &document_flows.goods_receipts,
3706 &document_flows.vendor_invoices,
3707 &document_flows.payments,
3708 );
3709 builder.add_o2c_documents(
3710 &document_flows.sales_orders,
3711 &document_flows.deliveries,
3712 &document_flows.customer_invoices,
3713 );
3714
3715 builder.add_accounts(coa);
3717 builder.add_journal_entries_as_hyperedges(entries);
3718
3719 let hypergraph = builder.build();
3721
3722 let output_dir = self
3724 .output_path
3725 .clone()
3726 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3727 let hg_dir = output_dir
3728 .join(&self.config.graph_export.output_subdirectory)
3729 .join(&hg_settings.output_subdirectory);
3730
3731 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
3733 "unified" => {
3734 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
3735 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
3736 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
3737 })?;
3738 (
3739 metadata.num_nodes,
3740 metadata.num_edges,
3741 metadata.num_hyperedges,
3742 )
3743 }
3744 _ => {
3745 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
3747 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
3748 SynthError::generation(format!("Hypergraph export failed: {}", e))
3749 })?;
3750 (
3751 metadata.num_nodes,
3752 metadata.num_edges,
3753 metadata.num_hyperedges,
3754 )
3755 }
3756 };
3757
3758 #[cfg(feature = "streaming")]
3760 if let Some(ref target_url) = hg_settings.stream_target {
3761 use crate::stream_client::{StreamClient, StreamConfig};
3762 use std::io::Write as _;
3763
3764 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
3765 let stream_config = StreamConfig {
3766 target_url: target_url.clone(),
3767 batch_size: hg_settings.stream_batch_size,
3768 api_key,
3769 ..StreamConfig::default()
3770 };
3771
3772 match StreamClient::new(stream_config) {
3773 Ok(mut client) => {
3774 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
3775 match exporter.export_to_writer(&hypergraph, &mut client) {
3776 Ok(_) => {
3777 if let Err(e) = client.flush() {
3778 warn!("Failed to flush stream client: {}", e);
3779 } else {
3780 info!("Streamed {} records to {}", client.total_sent(), target_url);
3781 }
3782 }
3783 Err(e) => {
3784 warn!("Streaming export failed: {}", e);
3785 }
3786 }
3787 }
3788 Err(e) => {
3789 warn!("Failed to create stream client: {}", e);
3790 }
3791 }
3792 }
3793
3794 stats.graph_node_count += num_nodes;
3796 stats.graph_edge_count += num_edges;
3797 stats.graph_export_count += 1;
3798
3799 Ok(HypergraphExportInfo {
3800 node_count: num_nodes,
3801 edge_count: num_edges,
3802 hyperedge_count: num_hyperedges,
3803 output_path: hg_dir,
3804 })
3805 }
3806
3807 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
3812 let pb = self.create_progress_bar(100, "Generating Banking Data");
3813
3814 let orchestrator = BankingOrchestratorBuilder::new()
3816 .config(self.config.banking.clone())
3817 .seed(self.seed + 9000)
3818 .build();
3819
3820 if let Some(pb) = &pb {
3821 pb.inc(10);
3822 }
3823
3824 let result = orchestrator.generate();
3826
3827 if let Some(pb) = &pb {
3828 pb.inc(90);
3829 pb.finish_with_message(format!(
3830 "Banking: {} customers, {} transactions",
3831 result.customers.len(),
3832 result.transactions.len()
3833 ));
3834 }
3835
3836 Ok(BankingSnapshot {
3837 customers: result.customers,
3838 accounts: result.accounts,
3839 transactions: result.transactions,
3840 suspicious_count: result.stats.suspicious_count,
3841 scenario_count: result.scenarios.len(),
3842 })
3843 }
3844
3845 fn calculate_total_transactions(&self) -> u64 {
3847 let months = self.config.global.period_months as f64;
3848 self.config
3849 .companies
3850 .iter()
3851 .map(|c| {
3852 let annual = c.annual_transaction_volume.count() as f64;
3853 let weighted = annual * c.volume_weight;
3854 (weighted * months / 12.0) as u64
3855 })
3856 .sum()
3857 }
3858
3859 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
3861 if !self.phase_config.show_progress {
3862 return None;
3863 }
3864
3865 let pb = if let Some(mp) = &self.multi_progress {
3866 mp.add(ProgressBar::new(total))
3867 } else {
3868 ProgressBar::new(total)
3869 };
3870
3871 pb.set_style(
3872 ProgressStyle::default_bar()
3873 .template(&format!(
3874 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
3875 message
3876 ))
3877 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
3878 .progress_chars("#>-"),
3879 );
3880
3881 Some(pb)
3882 }
3883
3884 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
3886 self.coa.clone()
3887 }
3888
3889 pub fn get_master_data(&self) -> &MasterDataSnapshot {
3891 &self.master_data
3892 }
3893
3894 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
3896 use super::lineage::LineageGraphBuilder;
3897
3898 let mut builder = LineageGraphBuilder::new();
3899
3900 builder.add_config_section("config:global", "Global Config");
3902 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
3903 builder.add_config_section("config:transactions", "Transaction Config");
3904
3905 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
3907 builder.add_generator_phase("phase:je", "Journal Entry Generation");
3908
3909 builder.configured_by("phase:coa", "config:chart_of_accounts");
3911 builder.configured_by("phase:je", "config:transactions");
3912
3913 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
3915 builder.produced_by("output:je", "phase:je");
3916
3917 if self.phase_config.generate_master_data {
3919 builder.add_config_section("config:master_data", "Master Data Config");
3920 builder.add_generator_phase("phase:master_data", "Master Data Generation");
3921 builder.configured_by("phase:master_data", "config:master_data");
3922 builder.input_to("phase:master_data", "phase:je");
3923 }
3924
3925 if self.phase_config.generate_document_flows {
3926 builder.add_config_section("config:document_flows", "Document Flow Config");
3927 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
3928 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
3929 builder.configured_by("phase:p2p", "config:document_flows");
3930 builder.configured_by("phase:o2c", "config:document_flows");
3931
3932 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
3933 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
3934 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
3935 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
3936 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
3937
3938 builder.produced_by("output:po", "phase:p2p");
3939 builder.produced_by("output:gr", "phase:p2p");
3940 builder.produced_by("output:vi", "phase:p2p");
3941 builder.produced_by("output:so", "phase:o2c");
3942 builder.produced_by("output:ci", "phase:o2c");
3943 }
3944
3945 if self.phase_config.inject_anomalies {
3946 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
3947 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
3948 builder.configured_by("phase:anomaly", "config:fraud");
3949 builder.add_output_file(
3950 "output:labels",
3951 "Anomaly Labels",
3952 "labels/anomaly_labels.csv",
3953 );
3954 builder.produced_by("output:labels", "phase:anomaly");
3955 }
3956
3957 if self.phase_config.generate_audit {
3958 builder.add_config_section("config:audit", "Audit Config");
3959 builder.add_generator_phase("phase:audit", "Audit Data Generation");
3960 builder.configured_by("phase:audit", "config:audit");
3961 }
3962
3963 if self.phase_config.generate_banking {
3964 builder.add_config_section("config:banking", "Banking Config");
3965 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
3966 builder.configured_by("phase:banking", "config:banking");
3967 }
3968
3969 if self.config.llm.enabled {
3970 builder.add_config_section("config:llm", "LLM Enrichment Config");
3971 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
3972 builder.configured_by("phase:llm_enrichment", "config:llm");
3973 }
3974
3975 if self.config.diffusion.enabled {
3976 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
3977 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
3978 builder.configured_by("phase:diffusion", "config:diffusion");
3979 }
3980
3981 if self.config.causal.enabled {
3982 builder.add_config_section("config:causal", "Causal Generation Config");
3983 builder.add_generator_phase("phase:causal", "Causal Overlay");
3984 builder.configured_by("phase:causal", "config:causal");
3985 }
3986
3987 builder.build()
3988 }
3989}
3990
3991fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
3993 match format {
3994 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
3995 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
3996 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
3997 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
3998 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
3999 }
4000}
4001
4002#[cfg(test)]
4003#[allow(clippy::unwrap_used)]
4004mod tests {
4005 use super::*;
4006 use datasynth_config::schema::*;
4007
4008 fn create_test_config() -> GeneratorConfig {
4009 GeneratorConfig {
4010 global: GlobalConfig {
4011 industry: IndustrySector::Manufacturing,
4012 start_date: "2024-01-01".to_string(),
4013 period_months: 1,
4014 seed: Some(42),
4015 parallel: false,
4016 group_currency: "USD".to_string(),
4017 worker_threads: 0,
4018 memory_limit_mb: 0,
4019 },
4020 companies: vec![CompanyConfig {
4021 code: "1000".to_string(),
4022 name: "Test Company".to_string(),
4023 currency: "USD".to_string(),
4024 country: "US".to_string(),
4025 annual_transaction_volume: TransactionVolume::TenK,
4026 volume_weight: 1.0,
4027 fiscal_year_variant: "K4".to_string(),
4028 }],
4029 chart_of_accounts: ChartOfAccountsConfig {
4030 complexity: CoAComplexity::Small,
4031 industry_specific: true,
4032 custom_accounts: None,
4033 min_hierarchy_depth: 2,
4034 max_hierarchy_depth: 4,
4035 },
4036 transactions: TransactionConfig::default(),
4037 output: OutputConfig::default(),
4038 fraud: FraudConfig::default(),
4039 internal_controls: InternalControlsConfig::default(),
4040 business_processes: BusinessProcessConfig::default(),
4041 user_personas: UserPersonaConfig::default(),
4042 templates: TemplateConfig::default(),
4043 approval: ApprovalConfig::default(),
4044 departments: DepartmentConfig::default(),
4045 master_data: MasterDataConfig::default(),
4046 document_flows: DocumentFlowConfig::default(),
4047 intercompany: IntercompanyConfig::default(),
4048 balance: BalanceConfig::default(),
4049 ocpm: OcpmConfig::default(),
4050 audit: AuditGenerationConfig::default(),
4051 banking: datasynth_banking::BankingConfig::default(),
4052 data_quality: DataQualitySchemaConfig::default(),
4053 scenario: ScenarioConfig::default(),
4054 temporal: TemporalDriftConfig::default(),
4055 graph_export: GraphExportConfig::default(),
4056 streaming: StreamingSchemaConfig::default(),
4057 rate_limit: RateLimitSchemaConfig::default(),
4058 temporal_attributes: TemporalAttributeSchemaConfig::default(),
4059 relationships: RelationshipSchemaConfig::default(),
4060 accounting_standards: AccountingStandardsConfig::default(),
4061 audit_standards: AuditStandardsConfig::default(),
4062 distributions: Default::default(),
4063 temporal_patterns: Default::default(),
4064 vendor_network: VendorNetworkSchemaConfig::default(),
4065 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
4066 relationship_strength: RelationshipStrengthSchemaConfig::default(),
4067 cross_process_links: CrossProcessLinksSchemaConfig::default(),
4068 organizational_events: OrganizationalEventsSchemaConfig::default(),
4069 behavioral_drift: BehavioralDriftSchemaConfig::default(),
4070 market_drift: MarketDriftSchemaConfig::default(),
4071 drift_labeling: DriftLabelingSchemaConfig::default(),
4072 anomaly_injection: Default::default(),
4073 industry_specific: Default::default(),
4074 fingerprint_privacy: Default::default(),
4075 quality_gates: Default::default(),
4076 compliance: Default::default(),
4077 webhooks: Default::default(),
4078 llm: Default::default(),
4079 diffusion: Default::default(),
4080 causal: Default::default(),
4081 source_to_pay: Default::default(),
4082 financial_reporting: Default::default(),
4083 hr: Default::default(),
4084 manufacturing: Default::default(),
4085 sales_quotes: Default::default(),
4086 }
4087 }
4088
4089 #[test]
4090 fn test_enhanced_orchestrator_creation() {
4091 let config = create_test_config();
4092 let orchestrator = EnhancedOrchestrator::with_defaults(config);
4093 assert!(orchestrator.is_ok());
4094 }
4095
4096 #[test]
4097 fn test_minimal_generation() {
4098 let config = create_test_config();
4099 let phase_config = PhaseConfig {
4100 generate_master_data: false,
4101 generate_document_flows: false,
4102 generate_journal_entries: true,
4103 inject_anomalies: false,
4104 show_progress: false,
4105 ..Default::default()
4106 };
4107
4108 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4109 let result = orchestrator.generate();
4110
4111 assert!(result.is_ok());
4112 let result = result.unwrap();
4113 assert!(!result.journal_entries.is_empty());
4114 }
4115
4116 #[test]
4117 fn test_master_data_generation() {
4118 let config = create_test_config();
4119 let phase_config = PhaseConfig {
4120 generate_master_data: true,
4121 generate_document_flows: false,
4122 generate_journal_entries: false,
4123 inject_anomalies: false,
4124 show_progress: false,
4125 vendors_per_company: 5,
4126 customers_per_company: 5,
4127 materials_per_company: 10,
4128 assets_per_company: 5,
4129 employees_per_company: 10,
4130 ..Default::default()
4131 };
4132
4133 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4134 let result = orchestrator.generate().unwrap();
4135
4136 assert!(!result.master_data.vendors.is_empty());
4137 assert!(!result.master_data.customers.is_empty());
4138 assert!(!result.master_data.materials.is_empty());
4139 }
4140
4141 #[test]
4142 fn test_document_flow_generation() {
4143 let config = create_test_config();
4144 let phase_config = PhaseConfig {
4145 generate_master_data: true,
4146 generate_document_flows: true,
4147 generate_journal_entries: false,
4148 inject_anomalies: false,
4149 inject_data_quality: false,
4150 validate_balances: false,
4151 generate_ocpm_events: false,
4152 show_progress: false,
4153 vendors_per_company: 5,
4154 customers_per_company: 5,
4155 materials_per_company: 10,
4156 assets_per_company: 5,
4157 employees_per_company: 10,
4158 p2p_chains: 5,
4159 o2c_chains: 5,
4160 ..Default::default()
4161 };
4162
4163 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4164 let result = orchestrator.generate().unwrap();
4165
4166 assert!(!result.document_flows.p2p_chains.is_empty());
4168 assert!(!result.document_flows.o2c_chains.is_empty());
4169
4170 assert!(!result.document_flows.purchase_orders.is_empty());
4172 assert!(!result.document_flows.sales_orders.is_empty());
4173 }
4174
4175 #[test]
4176 fn test_anomaly_injection() {
4177 let config = create_test_config();
4178 let phase_config = PhaseConfig {
4179 generate_master_data: false,
4180 generate_document_flows: false,
4181 generate_journal_entries: true,
4182 inject_anomalies: true,
4183 show_progress: false,
4184 ..Default::default()
4185 };
4186
4187 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4188 let result = orchestrator.generate().unwrap();
4189
4190 assert!(!result.journal_entries.is_empty());
4192
4193 assert!(result.anomaly_labels.summary.is_some());
4196 }
4197
4198 #[test]
4199 fn test_full_generation_pipeline() {
4200 let config = create_test_config();
4201 let phase_config = PhaseConfig {
4202 generate_master_data: true,
4203 generate_document_flows: true,
4204 generate_journal_entries: true,
4205 inject_anomalies: false,
4206 inject_data_quality: false,
4207 validate_balances: true,
4208 generate_ocpm_events: false,
4209 show_progress: false,
4210 vendors_per_company: 3,
4211 customers_per_company: 3,
4212 materials_per_company: 5,
4213 assets_per_company: 3,
4214 employees_per_company: 5,
4215 p2p_chains: 3,
4216 o2c_chains: 3,
4217 ..Default::default()
4218 };
4219
4220 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4221 let result = orchestrator.generate().unwrap();
4222
4223 assert!(!result.master_data.vendors.is_empty());
4225 assert!(!result.master_data.customers.is_empty());
4226 assert!(!result.document_flows.p2p_chains.is_empty());
4227 assert!(!result.document_flows.o2c_chains.is_empty());
4228 assert!(!result.journal_entries.is_empty());
4229 assert!(result.statistics.accounts_count > 0);
4230
4231 assert!(!result.subledger.ap_invoices.is_empty());
4233 assert!(!result.subledger.ar_invoices.is_empty());
4234
4235 assert!(result.balance_validation.validated);
4237 assert!(result.balance_validation.entries_processed > 0);
4238 }
4239
4240 #[test]
4241 fn test_subledger_linking() {
4242 let config = create_test_config();
4243 let phase_config = PhaseConfig {
4244 generate_master_data: true,
4245 generate_document_flows: true,
4246 generate_journal_entries: false,
4247 inject_anomalies: false,
4248 inject_data_quality: false,
4249 validate_balances: false,
4250 generate_ocpm_events: false,
4251 show_progress: false,
4252 vendors_per_company: 5,
4253 customers_per_company: 5,
4254 materials_per_company: 10,
4255 assets_per_company: 3,
4256 employees_per_company: 5,
4257 p2p_chains: 5,
4258 o2c_chains: 5,
4259 ..Default::default()
4260 };
4261
4262 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4263 let result = orchestrator.generate().unwrap();
4264
4265 assert!(!result.document_flows.vendor_invoices.is_empty());
4267 assert!(!result.document_flows.customer_invoices.is_empty());
4268
4269 assert!(!result.subledger.ap_invoices.is_empty());
4271 assert!(!result.subledger.ar_invoices.is_empty());
4272
4273 assert_eq!(
4275 result.subledger.ap_invoices.len(),
4276 result.document_flows.vendor_invoices.len()
4277 );
4278
4279 assert_eq!(
4281 result.subledger.ar_invoices.len(),
4282 result.document_flows.customer_invoices.len()
4283 );
4284
4285 assert_eq!(
4287 result.statistics.ap_invoice_count,
4288 result.subledger.ap_invoices.len()
4289 );
4290 assert_eq!(
4291 result.statistics.ar_invoice_count,
4292 result.subledger.ar_invoices.len()
4293 );
4294 }
4295
4296 #[test]
4297 fn test_balance_validation() {
4298 let config = create_test_config();
4299 let phase_config = PhaseConfig {
4300 generate_master_data: false,
4301 generate_document_flows: false,
4302 generate_journal_entries: true,
4303 inject_anomalies: false,
4304 validate_balances: true,
4305 show_progress: false,
4306 ..Default::default()
4307 };
4308
4309 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4310 let result = orchestrator.generate().unwrap();
4311
4312 assert!(result.balance_validation.validated);
4314 assert!(result.balance_validation.entries_processed > 0);
4315
4316 assert!(!result.balance_validation.has_unbalanced_entries);
4318
4319 assert_eq!(
4321 result.balance_validation.total_debits,
4322 result.balance_validation.total_credits
4323 );
4324 }
4325
4326 #[test]
4327 fn test_statistics_accuracy() {
4328 let config = create_test_config();
4329 let phase_config = PhaseConfig {
4330 generate_master_data: true,
4331 generate_document_flows: false,
4332 generate_journal_entries: true,
4333 inject_anomalies: false,
4334 show_progress: false,
4335 vendors_per_company: 10,
4336 customers_per_company: 20,
4337 materials_per_company: 15,
4338 assets_per_company: 5,
4339 employees_per_company: 8,
4340 ..Default::default()
4341 };
4342
4343 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4344 let result = orchestrator.generate().unwrap();
4345
4346 assert_eq!(
4348 result.statistics.vendor_count,
4349 result.master_data.vendors.len()
4350 );
4351 assert_eq!(
4352 result.statistics.customer_count,
4353 result.master_data.customers.len()
4354 );
4355 assert_eq!(
4356 result.statistics.material_count,
4357 result.master_data.materials.len()
4358 );
4359 assert_eq!(
4360 result.statistics.total_entries as usize,
4361 result.journal_entries.len()
4362 );
4363 }
4364
4365 #[test]
4366 fn test_phase_config_defaults() {
4367 let config = PhaseConfig::default();
4368 assert!(config.generate_master_data);
4369 assert!(config.generate_document_flows);
4370 assert!(config.generate_journal_entries);
4371 assert!(!config.inject_anomalies);
4372 assert!(config.validate_balances);
4373 assert!(config.show_progress);
4374 assert!(config.vendors_per_company > 0);
4375 assert!(config.customers_per_company > 0);
4376 }
4377
4378 #[test]
4379 fn test_get_coa_before_generation() {
4380 let config = create_test_config();
4381 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
4382
4383 assert!(orchestrator.get_coa().is_none());
4385 }
4386
4387 #[test]
4388 fn test_get_coa_after_generation() {
4389 let config = create_test_config();
4390 let phase_config = PhaseConfig {
4391 generate_master_data: false,
4392 generate_document_flows: false,
4393 generate_journal_entries: true,
4394 inject_anomalies: false,
4395 show_progress: false,
4396 ..Default::default()
4397 };
4398
4399 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4400 let _ = orchestrator.generate().unwrap();
4401
4402 assert!(orchestrator.get_coa().is_some());
4404 }
4405
4406 #[test]
4407 fn test_get_master_data() {
4408 let config = create_test_config();
4409 let phase_config = PhaseConfig {
4410 generate_master_data: true,
4411 generate_document_flows: false,
4412 generate_journal_entries: false,
4413 inject_anomalies: false,
4414 show_progress: false,
4415 vendors_per_company: 5,
4416 customers_per_company: 5,
4417 materials_per_company: 5,
4418 assets_per_company: 5,
4419 employees_per_company: 5,
4420 ..Default::default()
4421 };
4422
4423 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4424 let _ = orchestrator.generate().unwrap();
4425
4426 let master_data = orchestrator.get_master_data();
4427 assert!(!master_data.vendors.is_empty());
4428 }
4429
4430 #[test]
4431 fn test_with_progress_builder() {
4432 let config = create_test_config();
4433 let orchestrator = EnhancedOrchestrator::with_defaults(config)
4434 .unwrap()
4435 .with_progress(false);
4436
4437 assert!(!orchestrator.phase_config.show_progress);
4439 }
4440
4441 #[test]
4442 fn test_multi_company_generation() {
4443 let mut config = create_test_config();
4444 config.companies.push(CompanyConfig {
4445 code: "2000".to_string(),
4446 name: "Subsidiary".to_string(),
4447 currency: "EUR".to_string(),
4448 country: "DE".to_string(),
4449 annual_transaction_volume: TransactionVolume::TenK,
4450 volume_weight: 0.5,
4451 fiscal_year_variant: "K4".to_string(),
4452 });
4453
4454 let phase_config = PhaseConfig {
4455 generate_master_data: true,
4456 generate_document_flows: false,
4457 generate_journal_entries: true,
4458 inject_anomalies: false,
4459 show_progress: false,
4460 vendors_per_company: 5,
4461 customers_per_company: 5,
4462 materials_per_company: 5,
4463 assets_per_company: 5,
4464 employees_per_company: 5,
4465 ..Default::default()
4466 };
4467
4468 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4469 let result = orchestrator.generate().unwrap();
4470
4471 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
4474 assert!(result.statistics.companies_count == 2);
4475 }
4476
4477 #[test]
4478 fn test_empty_master_data_skips_document_flows() {
4479 let config = create_test_config();
4480 let phase_config = PhaseConfig {
4481 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
4484 inject_anomalies: false,
4485 show_progress: false,
4486 ..Default::default()
4487 };
4488
4489 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4490 let result = orchestrator.generate().unwrap();
4491
4492 assert!(result.document_flows.p2p_chains.is_empty());
4494 assert!(result.document_flows.o2c_chains.is_empty());
4495 }
4496
4497 #[test]
4498 fn test_journal_entry_line_item_count() {
4499 let config = create_test_config();
4500 let phase_config = PhaseConfig {
4501 generate_master_data: false,
4502 generate_document_flows: false,
4503 generate_journal_entries: true,
4504 inject_anomalies: false,
4505 show_progress: false,
4506 ..Default::default()
4507 };
4508
4509 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4510 let result = orchestrator.generate().unwrap();
4511
4512 let calculated_line_items: u64 = result
4514 .journal_entries
4515 .iter()
4516 .map(|e| e.line_count() as u64)
4517 .sum();
4518 assert_eq!(result.statistics.total_line_items, calculated_line_items);
4519 }
4520
4521 #[test]
4522 fn test_audit_generation() {
4523 let config = create_test_config();
4524 let phase_config = PhaseConfig {
4525 generate_master_data: false,
4526 generate_document_flows: false,
4527 generate_journal_entries: true,
4528 inject_anomalies: false,
4529 show_progress: false,
4530 generate_audit: true,
4531 audit_engagements: 2,
4532 workpapers_per_engagement: 5,
4533 evidence_per_workpaper: 2,
4534 risks_per_engagement: 3,
4535 findings_per_engagement: 2,
4536 judgments_per_engagement: 2,
4537 ..Default::default()
4538 };
4539
4540 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4541 let result = orchestrator.generate().unwrap();
4542
4543 assert_eq!(result.audit.engagements.len(), 2);
4545 assert!(!result.audit.workpapers.is_empty());
4546 assert!(!result.audit.evidence.is_empty());
4547 assert!(!result.audit.risk_assessments.is_empty());
4548 assert!(!result.audit.findings.is_empty());
4549 assert!(!result.audit.judgments.is_empty());
4550
4551 assert_eq!(
4553 result.statistics.audit_engagement_count,
4554 result.audit.engagements.len()
4555 );
4556 assert_eq!(
4557 result.statistics.audit_workpaper_count,
4558 result.audit.workpapers.len()
4559 );
4560 assert_eq!(
4561 result.statistics.audit_evidence_count,
4562 result.audit.evidence.len()
4563 );
4564 assert_eq!(
4565 result.statistics.audit_risk_count,
4566 result.audit.risk_assessments.len()
4567 );
4568 assert_eq!(
4569 result.statistics.audit_finding_count,
4570 result.audit.findings.len()
4571 );
4572 assert_eq!(
4573 result.statistics.audit_judgment_count,
4574 result.audit.judgments.len()
4575 );
4576 }
4577
4578 #[test]
4579 fn test_new_phases_disabled_by_default() {
4580 let config = create_test_config();
4581 assert!(!config.llm.enabled);
4583 assert!(!config.diffusion.enabled);
4584 assert!(!config.causal.enabled);
4585
4586 let phase_config = PhaseConfig {
4587 generate_master_data: false,
4588 generate_document_flows: false,
4589 generate_journal_entries: true,
4590 inject_anomalies: false,
4591 show_progress: false,
4592 ..Default::default()
4593 };
4594
4595 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4596 let result = orchestrator.generate().unwrap();
4597
4598 assert_eq!(result.statistics.llm_enrichment_ms, 0);
4600 assert_eq!(result.statistics.llm_vendors_enriched, 0);
4601 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
4602 assert_eq!(result.statistics.diffusion_samples_generated, 0);
4603 assert_eq!(result.statistics.causal_generation_ms, 0);
4604 assert_eq!(result.statistics.causal_samples_generated, 0);
4605 assert!(result.statistics.causal_validation_passed.is_none());
4606 }
4607
4608 #[test]
4609 fn test_llm_enrichment_enabled() {
4610 let mut config = create_test_config();
4611 config.llm.enabled = true;
4612 config.llm.max_vendor_enrichments = 3;
4613
4614 let phase_config = PhaseConfig {
4615 generate_master_data: true,
4616 generate_document_flows: false,
4617 generate_journal_entries: false,
4618 inject_anomalies: false,
4619 show_progress: false,
4620 vendors_per_company: 5,
4621 customers_per_company: 3,
4622 materials_per_company: 3,
4623 assets_per_company: 3,
4624 employees_per_company: 3,
4625 ..Default::default()
4626 };
4627
4628 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4629 let result = orchestrator.generate().unwrap();
4630
4631 assert!(result.statistics.llm_vendors_enriched > 0);
4633 assert!(result.statistics.llm_vendors_enriched <= 3);
4634 }
4635
4636 #[test]
4637 fn test_diffusion_enhancement_enabled() {
4638 let mut config = create_test_config();
4639 config.diffusion.enabled = true;
4640 config.diffusion.n_steps = 50;
4641 config.diffusion.sample_size = 20;
4642
4643 let phase_config = PhaseConfig {
4644 generate_master_data: false,
4645 generate_document_flows: false,
4646 generate_journal_entries: true,
4647 inject_anomalies: false,
4648 show_progress: false,
4649 ..Default::default()
4650 };
4651
4652 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4653 let result = orchestrator.generate().unwrap();
4654
4655 assert_eq!(result.statistics.diffusion_samples_generated, 20);
4657 }
4658
4659 #[test]
4660 fn test_causal_overlay_enabled() {
4661 let mut config = create_test_config();
4662 config.causal.enabled = true;
4663 config.causal.template = "fraud_detection".to_string();
4664 config.causal.sample_size = 100;
4665 config.causal.validate = true;
4666
4667 let phase_config = PhaseConfig {
4668 generate_master_data: false,
4669 generate_document_flows: false,
4670 generate_journal_entries: true,
4671 inject_anomalies: false,
4672 show_progress: false,
4673 ..Default::default()
4674 };
4675
4676 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4677 let result = orchestrator.generate().unwrap();
4678
4679 assert_eq!(result.statistics.causal_samples_generated, 100);
4681 assert!(result.statistics.causal_validation_passed.is_some());
4683 }
4684
4685 #[test]
4686 fn test_causal_overlay_revenue_cycle_template() {
4687 let mut config = create_test_config();
4688 config.causal.enabled = true;
4689 config.causal.template = "revenue_cycle".to_string();
4690 config.causal.sample_size = 50;
4691 config.causal.validate = false;
4692
4693 let phase_config = PhaseConfig {
4694 generate_master_data: false,
4695 generate_document_flows: false,
4696 generate_journal_entries: true,
4697 inject_anomalies: false,
4698 show_progress: false,
4699 ..Default::default()
4700 };
4701
4702 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4703 let result = orchestrator.generate().unwrap();
4704
4705 assert_eq!(result.statistics.causal_samples_generated, 50);
4707 assert!(result.statistics.causal_validation_passed.is_none());
4709 }
4710
4711 #[test]
4712 fn test_all_new_phases_enabled_together() {
4713 let mut config = create_test_config();
4714 config.llm.enabled = true;
4715 config.llm.max_vendor_enrichments = 2;
4716 config.diffusion.enabled = true;
4717 config.diffusion.n_steps = 20;
4718 config.diffusion.sample_size = 10;
4719 config.causal.enabled = true;
4720 config.causal.sample_size = 50;
4721 config.causal.validate = true;
4722
4723 let phase_config = PhaseConfig {
4724 generate_master_data: true,
4725 generate_document_flows: false,
4726 generate_journal_entries: true,
4727 inject_anomalies: false,
4728 show_progress: false,
4729 vendors_per_company: 5,
4730 customers_per_company: 3,
4731 materials_per_company: 3,
4732 assets_per_company: 3,
4733 employees_per_company: 3,
4734 ..Default::default()
4735 };
4736
4737 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4738 let result = orchestrator.generate().unwrap();
4739
4740 assert!(result.statistics.llm_vendors_enriched > 0);
4742 assert_eq!(result.statistics.diffusion_samples_generated, 10);
4743 assert_eq!(result.statistics.causal_samples_generated, 50);
4744 assert!(result.statistics.causal_validation_passed.is_some());
4745 }
4746
4747 #[test]
4748 fn test_statistics_serialization_with_new_fields() {
4749 let stats = EnhancedGenerationStatistics {
4750 total_entries: 100,
4751 total_line_items: 500,
4752 llm_enrichment_ms: 42,
4753 llm_vendors_enriched: 10,
4754 diffusion_enhancement_ms: 100,
4755 diffusion_samples_generated: 50,
4756 causal_generation_ms: 200,
4757 causal_samples_generated: 100,
4758 causal_validation_passed: Some(true),
4759 ..Default::default()
4760 };
4761
4762 let json = serde_json::to_string(&stats).unwrap();
4763 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
4764
4765 assert_eq!(deserialized.llm_enrichment_ms, 42);
4766 assert_eq!(deserialized.llm_vendors_enriched, 10);
4767 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
4768 assert_eq!(deserialized.diffusion_samples_generated, 50);
4769 assert_eq!(deserialized.causal_generation_ms, 200);
4770 assert_eq!(deserialized.causal_samples_generated, 100);
4771 assert_eq!(deserialized.causal_validation_passed, Some(true));
4772 }
4773
4774 #[test]
4775 fn test_statistics_backward_compat_deserialization() {
4776 let old_json = r#"{
4778 "total_entries": 100,
4779 "total_line_items": 500,
4780 "accounts_count": 50,
4781 "companies_count": 1,
4782 "period_months": 12,
4783 "vendor_count": 10,
4784 "customer_count": 20,
4785 "material_count": 15,
4786 "asset_count": 5,
4787 "employee_count": 8,
4788 "p2p_chain_count": 5,
4789 "o2c_chain_count": 5,
4790 "ap_invoice_count": 5,
4791 "ar_invoice_count": 5,
4792 "ocpm_event_count": 0,
4793 "ocpm_object_count": 0,
4794 "ocpm_case_count": 0,
4795 "audit_engagement_count": 0,
4796 "audit_workpaper_count": 0,
4797 "audit_evidence_count": 0,
4798 "audit_risk_count": 0,
4799 "audit_finding_count": 0,
4800 "audit_judgment_count": 0,
4801 "anomalies_injected": 0,
4802 "data_quality_issues": 0,
4803 "banking_customer_count": 0,
4804 "banking_account_count": 0,
4805 "banking_transaction_count": 0,
4806 "banking_suspicious_count": 0,
4807 "graph_export_count": 0,
4808 "graph_node_count": 0,
4809 "graph_edge_count": 0
4810 }"#;
4811
4812 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
4813
4814 assert_eq!(stats.llm_enrichment_ms, 0);
4816 assert_eq!(stats.llm_vendors_enriched, 0);
4817 assert_eq!(stats.diffusion_enhancement_ms, 0);
4818 assert_eq!(stats.diffusion_samples_generated, 0);
4819 assert_eq!(stats.causal_generation_ms, 0);
4820 assert_eq!(stats.causal_samples_generated, 0);
4821 assert!(stats.causal_validation_passed.is_none());
4822 }
4823}