1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use serde::{Deserialize, Serialize};
28use tracing::{debug, info, warn};
29
30use datasynth_banking::{
31 models::{BankAccount, BankTransaction, BankingCustomer},
32 BankingOrchestratorBuilder,
33};
34use datasynth_config::schema::GeneratorConfig;
35use datasynth_core::error::{SynthError, SynthResult};
36use datasynth_core::models::audit::{
37 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
38};
39use datasynth_core::models::sourcing::{
40 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
41 SupplierBid, SupplierQualification, SupplierScorecard,
42};
43use datasynth_core::models::subledger::ap::APInvoice;
44use datasynth_core::models::subledger::ar::ARInvoice;
45use datasynth_core::models::*;
46use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
47use datasynth_fingerprint::{
48 io::FingerprintReader,
49 models::Fingerprint,
50 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
51};
52use datasynth_generators::{
53 AnomalyInjector,
55 AnomalyInjectorConfig,
56 AssetGenerator,
57 AuditEngagementGenerator,
59 BalanceTrackerConfig,
60 BankReconciliationGenerator,
62 BidEvaluationGenerator,
64 BidGenerator,
65 CatalogGenerator,
66 ChartOfAccountsGenerator,
68 ContractGenerator,
69 CustomerGenerator,
70 DataQualityConfig,
71 DataQualityInjector,
73 DataQualityStats,
74 DocumentFlowJeConfig,
76 DocumentFlowJeGenerator,
77 DocumentFlowLinker,
79 EmployeeGenerator,
80 EvidenceGenerator,
81 FinancialStatementGenerator,
83 FindingGenerator,
84 JournalEntryGenerator,
85 JudgmentGenerator,
86 LatePaymentDistribution,
87 MaterialGenerator,
88 O2CDocumentChain,
89 O2CGenerator,
90 O2CGeneratorConfig,
91 O2CPaymentBehavior,
92 P2PDocumentChain,
93 P2PGenerator,
95 P2PGeneratorConfig,
96 P2PPaymentBehavior,
97 PaymentReference,
98 QualificationGenerator,
99 RfxGenerator,
100 RiskAssessmentGenerator,
101 RunningBalanceTracker,
103 ScorecardGenerator,
104 SourcingProjectGenerator,
105 SpendAnalysisGenerator,
106 ValidationError,
107 VendorGenerator,
109 WorkpaperGenerator,
110};
111use datasynth_graph::{
112 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
113};
114use datasynth_ocpm::{
115 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
116 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
117 P2pDocuments, S2cDocuments,
118};
119
120use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
121use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
122use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
123use datasynth_core::llm::MockLlmProvider;
124use datasynth_core::models::documents::PaymentMethod;
125use datasynth_generators::llm_enrichment::VendorLlmEnricher;
126
127fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
133 let payment_behavior = &schema_config.payment_behavior;
134 let late_dist = &payment_behavior.late_payment_days_distribution;
135
136 P2PGeneratorConfig {
137 three_way_match_rate: schema_config.three_way_match_rate,
138 partial_delivery_rate: schema_config.partial_delivery_rate,
139 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
141 max_price_variance_percent: schema_config.max_price_variance_percent,
142 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
143 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
144 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
145 payment_method_distribution: vec![
146 (PaymentMethod::BankTransfer, 0.60),
147 (PaymentMethod::Check, 0.25),
148 (PaymentMethod::Wire, 0.10),
149 (PaymentMethod::CreditCard, 0.05),
150 ],
151 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
153 late_payment_rate: payment_behavior.late_payment_rate,
154 late_payment_distribution: LatePaymentDistribution {
155 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
156 late_8_to_14: late_dist.late_8_to_14,
157 very_late_15_to_30: late_dist.very_late_15_to_30,
158 severely_late_31_to_60: late_dist.severely_late_31_to_60,
159 extremely_late_over_60: late_dist.extremely_late_over_60,
160 },
161 partial_payment_rate: payment_behavior.partial_payment_rate,
162 payment_correction_rate: payment_behavior.payment_correction_rate,
163 },
164 }
165}
166
167fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
169 let payment_behavior = &schema_config.payment_behavior;
170
171 O2CGeneratorConfig {
172 credit_check_failure_rate: schema_config.credit_check_failure_rate,
173 partial_shipment_rate: schema_config.partial_shipment_rate,
174 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
175 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
176 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
177 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
179 returns_rate: schema_config.return_rate,
180 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
181 payment_method_distribution: vec![
182 (PaymentMethod::BankTransfer, 0.50),
183 (PaymentMethod::Check, 0.30),
184 (PaymentMethod::Wire, 0.15),
185 (PaymentMethod::CreditCard, 0.05),
186 ],
187 payment_behavior: O2CPaymentBehavior {
188 partial_payment_rate: payment_behavior.partial_payments.rate,
189 short_payment_rate: payment_behavior.short_payments.rate,
190 max_short_percent: payment_behavior.short_payments.max_short_percent,
191 on_account_rate: payment_behavior.on_account_payments.rate,
192 payment_correction_rate: payment_behavior.payment_corrections.rate,
193 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
194 },
195 }
196}
197
198#[derive(Debug, Clone)]
200pub struct PhaseConfig {
201 pub generate_master_data: bool,
203 pub generate_document_flows: bool,
205 pub generate_ocpm_events: bool,
207 pub generate_journal_entries: bool,
209 pub inject_anomalies: bool,
211 pub inject_data_quality: bool,
213 pub validate_balances: bool,
215 pub show_progress: bool,
217 pub vendors_per_company: usize,
219 pub customers_per_company: usize,
221 pub materials_per_company: usize,
223 pub assets_per_company: usize,
225 pub employees_per_company: usize,
227 pub p2p_chains: usize,
229 pub o2c_chains: usize,
231 pub generate_audit: bool,
233 pub audit_engagements: usize,
235 pub workpapers_per_engagement: usize,
237 pub evidence_per_workpaper: usize,
239 pub risks_per_engagement: usize,
241 pub findings_per_engagement: usize,
243 pub judgments_per_engagement: usize,
245 pub generate_banking: bool,
247 pub generate_graph_export: bool,
249 pub generate_sourcing: bool,
251 pub generate_bank_reconciliation: bool,
253 pub generate_financial_statements: bool,
255 pub generate_accounting_standards: bool,
257 pub generate_manufacturing: bool,
259 pub generate_sales_kpi_budgets: bool,
261}
262
263impl Default for PhaseConfig {
264 fn default() -> Self {
265 Self {
266 generate_master_data: true,
267 generate_document_flows: true,
268 generate_ocpm_events: false, generate_journal_entries: true,
270 inject_anomalies: false,
271 inject_data_quality: false, validate_balances: true,
273 show_progress: true,
274 vendors_per_company: 50,
275 customers_per_company: 100,
276 materials_per_company: 200,
277 assets_per_company: 50,
278 employees_per_company: 100,
279 p2p_chains: 100,
280 o2c_chains: 100,
281 generate_audit: false, audit_engagements: 5,
283 workpapers_per_engagement: 20,
284 evidence_per_workpaper: 5,
285 risks_per_engagement: 15,
286 findings_per_engagement: 8,
287 judgments_per_engagement: 10,
288 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, }
297 }
298}
299
300#[derive(Debug, Clone, Default)]
302pub struct MasterDataSnapshot {
303 pub vendors: Vec<Vendor>,
305 pub customers: Vec<Customer>,
307 pub materials: Vec<Material>,
309 pub assets: Vec<FixedAsset>,
311 pub employees: Vec<Employee>,
313}
314
315#[derive(Debug, Clone)]
317pub struct HypergraphExportInfo {
318 pub node_count: usize,
320 pub edge_count: usize,
322 pub hyperedge_count: usize,
324 pub output_path: PathBuf,
326}
327
328#[derive(Debug, Clone, Default)]
330pub struct DocumentFlowSnapshot {
331 pub p2p_chains: Vec<P2PDocumentChain>,
333 pub o2c_chains: Vec<O2CDocumentChain>,
335 pub purchase_orders: Vec<documents::PurchaseOrder>,
337 pub goods_receipts: Vec<documents::GoodsReceipt>,
339 pub vendor_invoices: Vec<documents::VendorInvoice>,
341 pub sales_orders: Vec<documents::SalesOrder>,
343 pub deliveries: Vec<documents::Delivery>,
345 pub customer_invoices: Vec<documents::CustomerInvoice>,
347 pub payments: Vec<documents::Payment>,
349}
350
351#[derive(Debug, Clone, Default)]
353pub struct SubledgerSnapshot {
354 pub ap_invoices: Vec<APInvoice>,
356 pub ar_invoices: Vec<ARInvoice>,
358}
359
360#[derive(Debug, Clone, Default)]
362pub struct OcpmSnapshot {
363 pub event_log: Option<OcpmEventLog>,
365 pub event_count: usize,
367 pub object_count: usize,
369 pub case_count: usize,
371}
372
373#[derive(Debug, Clone, Default)]
375pub struct AuditSnapshot {
376 pub engagements: Vec<AuditEngagement>,
378 pub workpapers: Vec<Workpaper>,
380 pub evidence: Vec<AuditEvidence>,
382 pub risk_assessments: Vec<RiskAssessment>,
384 pub findings: Vec<AuditFinding>,
386 pub judgments: Vec<ProfessionalJudgment>,
388}
389
390#[derive(Debug, Clone, Default)]
392pub struct BankingSnapshot {
393 pub customers: Vec<BankingCustomer>,
395 pub accounts: Vec<BankAccount>,
397 pub transactions: Vec<BankTransaction>,
399 pub suspicious_count: usize,
401 pub scenario_count: usize,
403}
404
405#[derive(Debug, Clone, Default)]
407pub struct GraphExportSnapshot {
408 pub exported: bool,
410 pub graph_count: usize,
412 pub exports: HashMap<String, GraphExportInfo>,
414}
415
416#[derive(Debug, Clone)]
418pub struct GraphExportInfo {
419 pub name: String,
421 pub format: String,
423 pub output_path: PathBuf,
425 pub node_count: usize,
427 pub edge_count: usize,
429}
430
431#[derive(Debug, Clone, Default)]
433pub struct SourcingSnapshot {
434 pub spend_analyses: Vec<SpendAnalysis>,
436 pub sourcing_projects: Vec<SourcingProject>,
438 pub qualifications: Vec<SupplierQualification>,
440 pub rfx_events: Vec<RfxEvent>,
442 pub bids: Vec<SupplierBid>,
444 pub bid_evaluations: Vec<BidEvaluation>,
446 pub contracts: Vec<ProcurementContract>,
448 pub catalog_items: Vec<CatalogItem>,
450 pub scorecards: Vec<SupplierScorecard>,
452}
453
454#[derive(Debug, Clone, Default)]
456pub struct FinancialReportingSnapshot {
457 pub financial_statements: Vec<FinancialStatement>,
459 pub bank_reconciliations: Vec<BankReconciliation>,
461}
462
463#[derive(Debug, Clone, Default)]
465pub struct HrSnapshot {
466 pub payroll_runs: Vec<PayrollRun>,
468 pub payroll_line_items: Vec<PayrollLineItem>,
470 pub time_entries: Vec<TimeEntry>,
472 pub expense_reports: Vec<ExpenseReport>,
474 pub payroll_run_count: usize,
476 pub payroll_line_item_count: usize,
478 pub time_entry_count: usize,
480 pub expense_report_count: usize,
482}
483
484#[derive(Debug, Clone, Default)]
486pub struct AccountingStandardsSnapshot {
487 pub revenue_contract_count: usize,
489 pub impairment_test_count: usize,
491}
492
493#[derive(Debug, Clone, Default)]
495pub struct ManufacturingSnapshot {
496 pub production_orders: Vec<ProductionOrder>,
498 pub quality_inspections: Vec<QualityInspection>,
500 pub cycle_counts: Vec<CycleCount>,
502 pub production_order_count: usize,
504 pub quality_inspection_count: usize,
506 pub cycle_count_count: usize,
508}
509
510#[derive(Debug, Clone, Default)]
512pub struct SalesKpiBudgetsSnapshot {
513 pub sales_quotes: Vec<SalesQuote>,
515 pub kpis: Vec<ManagementKpi>,
517 pub budgets: Vec<Budget>,
519 pub sales_quote_count: usize,
521 pub kpi_count: usize,
523 pub budget_line_count: usize,
525}
526
527#[derive(Debug, Clone, Default)]
529pub struct AnomalyLabels {
530 pub labels: Vec<LabeledAnomaly>,
532 pub summary: Option<AnomalySummary>,
534 pub by_type: HashMap<String, usize>,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct BalanceValidationResult {
541 pub validated: bool,
543 pub is_balanced: bool,
545 pub entries_processed: u64,
547 pub total_debits: rust_decimal::Decimal,
549 pub total_credits: rust_decimal::Decimal,
551 pub accounts_tracked: usize,
553 pub companies_tracked: usize,
555 pub validation_errors: Vec<ValidationError>,
557 pub has_unbalanced_entries: bool,
559}
560
561#[derive(Debug)]
563pub struct EnhancedGenerationResult {
564 pub chart_of_accounts: ChartOfAccounts,
566 pub master_data: MasterDataSnapshot,
568 pub document_flows: DocumentFlowSnapshot,
570 pub subledger: SubledgerSnapshot,
572 pub ocpm: OcpmSnapshot,
574 pub audit: AuditSnapshot,
576 pub banking: BankingSnapshot,
578 pub graph_export: GraphExportSnapshot,
580 pub sourcing: SourcingSnapshot,
582 pub financial_reporting: FinancialReportingSnapshot,
584 pub hr: HrSnapshot,
586 pub accounting_standards: AccountingStandardsSnapshot,
588 pub manufacturing: ManufacturingSnapshot,
590 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
592 pub journal_entries: Vec<JournalEntry>,
594 pub anomaly_labels: AnomalyLabels,
596 pub balance_validation: BalanceValidationResult,
598 pub data_quality_stats: DataQualityStats,
600 pub statistics: EnhancedGenerationStatistics,
602 pub lineage: Option<super::lineage::LineageGraph>,
604 pub gate_result: Option<datasynth_eval::gates::GateResult>,
606}
607
608#[derive(Debug, Clone, Default, Serialize, Deserialize)]
610pub struct EnhancedGenerationStatistics {
611 pub total_entries: u64,
613 pub total_line_items: u64,
615 pub accounts_count: usize,
617 pub companies_count: usize,
619 pub period_months: u32,
621 pub vendor_count: usize,
623 pub customer_count: usize,
624 pub material_count: usize,
625 pub asset_count: usize,
626 pub employee_count: usize,
627 pub p2p_chain_count: usize,
629 pub o2c_chain_count: usize,
630 pub ap_invoice_count: usize,
632 pub ar_invoice_count: usize,
633 pub ocpm_event_count: usize,
635 pub ocpm_object_count: usize,
636 pub ocpm_case_count: usize,
637 pub audit_engagement_count: usize,
639 pub audit_workpaper_count: usize,
640 pub audit_evidence_count: usize,
641 pub audit_risk_count: usize,
642 pub audit_finding_count: usize,
643 pub audit_judgment_count: usize,
644 pub anomalies_injected: usize,
646 pub data_quality_issues: usize,
648 pub banking_customer_count: usize,
650 pub banking_account_count: usize,
651 pub banking_transaction_count: usize,
652 pub banking_suspicious_count: usize,
653 pub graph_export_count: usize,
655 pub graph_node_count: usize,
656 pub graph_edge_count: usize,
657 #[serde(default)]
659 pub llm_enrichment_ms: u64,
660 #[serde(default)]
662 pub llm_vendors_enriched: usize,
663 #[serde(default)]
665 pub diffusion_enhancement_ms: u64,
666 #[serde(default)]
668 pub diffusion_samples_generated: usize,
669 #[serde(default)]
671 pub causal_generation_ms: u64,
672 #[serde(default)]
674 pub causal_samples_generated: usize,
675 #[serde(default)]
677 pub causal_validation_passed: Option<bool>,
678 #[serde(default)]
680 pub sourcing_project_count: usize,
681 #[serde(default)]
682 pub rfx_event_count: usize,
683 #[serde(default)]
684 pub bid_count: usize,
685 #[serde(default)]
686 pub contract_count: usize,
687 #[serde(default)]
688 pub catalog_item_count: usize,
689 #[serde(default)]
690 pub scorecard_count: usize,
691 #[serde(default)]
693 pub financial_statement_count: usize,
694 #[serde(default)]
695 pub bank_reconciliation_count: usize,
696 #[serde(default)]
698 pub payroll_run_count: usize,
699 #[serde(default)]
700 pub time_entry_count: usize,
701 #[serde(default)]
702 pub expense_report_count: usize,
703 #[serde(default)]
705 pub revenue_contract_count: usize,
706 #[serde(default)]
707 pub impairment_test_count: usize,
708 #[serde(default)]
710 pub production_order_count: usize,
711 #[serde(default)]
712 pub quality_inspection_count: usize,
713 #[serde(default)]
714 pub cycle_count_count: usize,
715 #[serde(default)]
717 pub sales_quote_count: usize,
718 #[serde(default)]
719 pub kpi_count: usize,
720 #[serde(default)]
721 pub budget_line_count: usize,
722}
723
724pub struct EnhancedOrchestrator {
726 config: GeneratorConfig,
727 phase_config: PhaseConfig,
728 coa: Option<Arc<ChartOfAccounts>>,
729 master_data: MasterDataSnapshot,
730 seed: u64,
731 multi_progress: Option<MultiProgress>,
732 resource_guard: ResourceGuard,
734 output_path: Option<PathBuf>,
736 copula_generators: Vec<CopulaGeneratorSpec>,
738}
739
740impl EnhancedOrchestrator {
741 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
743 datasynth_config::validate_config(&config)?;
744
745 let seed = config.global.seed.unwrap_or_else(rand::random);
746
747 let resource_guard = Self::build_resource_guard(&config, None);
749
750 Ok(Self {
751 config,
752 phase_config,
753 coa: None,
754 master_data: MasterDataSnapshot::default(),
755 seed,
756 multi_progress: None,
757 resource_guard,
758 output_path: None,
759 copula_generators: Vec::new(),
760 })
761 }
762
763 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
765 Self::new(config, PhaseConfig::default())
766 }
767
768 pub fn with_progress(mut self, show: bool) -> Self {
770 self.phase_config.show_progress = show;
771 if show {
772 self.multi_progress = Some(MultiProgress::new());
773 }
774 self
775 }
776
777 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
779 let path = path.into();
780 self.output_path = Some(path.clone());
781 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
783 self
784 }
785
786 pub fn has_copulas(&self) -> bool {
791 !self.copula_generators.is_empty()
792 }
793
794 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
800 &self.copula_generators
801 }
802
803 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
807 &mut self.copula_generators
808 }
809
810 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
814 self.copula_generators
815 .iter_mut()
816 .find(|c| c.name == copula_name)
817 .map(|c| c.generator.sample())
818 }
819
820 pub fn from_fingerprint(
843 fingerprint_path: &std::path::Path,
844 phase_config: PhaseConfig,
845 scale: f64,
846 ) -> SynthResult<Self> {
847 info!("Loading fingerprint from: {}", fingerprint_path.display());
848
849 let reader = FingerprintReader::new();
851 let fingerprint = reader
852 .read_from_file(fingerprint_path)
853 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
854
855 Self::from_fingerprint_data(fingerprint, phase_config, scale)
856 }
857
858 pub fn from_fingerprint_data(
865 fingerprint: Fingerprint,
866 phase_config: PhaseConfig,
867 scale: f64,
868 ) -> SynthResult<Self> {
869 info!(
870 "Synthesizing config from fingerprint (version: {}, tables: {})",
871 fingerprint.manifest.version,
872 fingerprint.schema.tables.len()
873 );
874
875 let seed: u64 = rand::random();
877
878 let options = SynthesisOptions {
880 scale,
881 seed: Some(seed),
882 preserve_correlations: true,
883 inject_anomalies: true,
884 };
885 let synthesizer = ConfigSynthesizer::with_options(options);
886
887 let synthesis_result = synthesizer
889 .synthesize_full(&fingerprint, seed)
890 .map_err(|e| {
891 SynthError::config(format!(
892 "Failed to synthesize config from fingerprint: {}",
893 e
894 ))
895 })?;
896
897 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
899 Self::base_config_for_industry(industry)
900 } else {
901 Self::base_config_for_industry("manufacturing")
902 };
903
904 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
906
907 info!(
909 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
910 fingerprint.schema.tables.len(),
911 scale,
912 synthesis_result.copula_generators.len()
913 );
914
915 if !synthesis_result.copula_generators.is_empty() {
916 for spec in &synthesis_result.copula_generators {
917 info!(
918 " Copula '{}' for table '{}': {} columns",
919 spec.name,
920 spec.table,
921 spec.columns.len()
922 );
923 }
924 }
925
926 let mut orchestrator = Self::new(config, phase_config)?;
928
929 orchestrator.copula_generators = synthesis_result.copula_generators;
931
932 Ok(orchestrator)
933 }
934
935 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
937 use datasynth_config::presets::create_preset;
938 use datasynth_config::TransactionVolume;
939 use datasynth_core::models::{CoAComplexity, IndustrySector};
940
941 let sector = match industry.to_lowercase().as_str() {
942 "manufacturing" => IndustrySector::Manufacturing,
943 "retail" => IndustrySector::Retail,
944 "financial" | "financial_services" => IndustrySector::FinancialServices,
945 "healthcare" => IndustrySector::Healthcare,
946 "technology" | "tech" => IndustrySector::Technology,
947 _ => IndustrySector::Manufacturing,
948 };
949
950 create_preset(
952 sector,
953 1, 12, CoAComplexity::Medium,
956 TransactionVolume::TenK,
957 )
958 }
959
960 fn apply_config_patch(
962 mut config: GeneratorConfig,
963 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
964 ) -> GeneratorConfig {
965 use datasynth_fingerprint::synthesis::ConfigValue;
966
967 for (key, value) in patch.values() {
968 match (key.as_str(), value) {
969 ("transactions.count", ConfigValue::Integer(n)) => {
972 info!(
973 "Fingerprint suggests {} transactions (apply via company volumes)",
974 n
975 );
976 }
977 ("global.period_months", ConfigValue::Integer(n)) => {
978 config.global.period_months = *n as u32;
979 }
980 ("global.start_date", ConfigValue::String(s)) => {
981 config.global.start_date = s.clone();
982 }
983 ("global.seed", ConfigValue::Integer(n)) => {
984 config.global.seed = Some(*n as u64);
985 }
986 ("fraud.enabled", ConfigValue::Bool(b)) => {
987 config.fraud.enabled = *b;
988 }
989 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
990 config.fraud.fraud_rate = *f;
991 }
992 ("data_quality.enabled", ConfigValue::Bool(b)) => {
993 config.data_quality.enabled = *b;
994 }
995 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
997 config.fraud.enabled = *b;
998 }
999 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1000 config.fraud.fraud_rate = *f;
1001 }
1002 _ => {
1003 debug!("Ignoring unknown config patch key: {}", key);
1004 }
1005 }
1006 }
1007
1008 config
1009 }
1010
1011 fn build_resource_guard(
1013 config: &GeneratorConfig,
1014 output_path: Option<PathBuf>,
1015 ) -> ResourceGuard {
1016 let mut builder = ResourceGuardBuilder::new();
1017
1018 if config.global.memory_limit_mb > 0 {
1020 builder = builder.memory_limit(config.global.memory_limit_mb);
1021 }
1022
1023 if let Some(path) = output_path {
1025 builder = builder.output_path(path).min_free_disk(100); }
1027
1028 builder = builder.conservative();
1030
1031 builder.build()
1032 }
1033
1034 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1039 self.resource_guard.check()
1040 }
1041
1042 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1044 let level = self.resource_guard.check()?;
1045
1046 if level != DegradationLevel::Normal {
1047 warn!(
1048 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1049 phase,
1050 level,
1051 self.resource_guard.current_memory_mb(),
1052 self.resource_guard.available_disk_mb()
1053 );
1054 }
1055
1056 Ok(level)
1057 }
1058
1059 fn get_degradation_actions(&self) -> DegradationActions {
1061 self.resource_guard.get_actions()
1062 }
1063
1064 fn check_memory_limit(&self) -> SynthResult<()> {
1066 self.check_resources()?;
1067 Ok(())
1068 }
1069
1070 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1072 info!("Starting enhanced generation workflow");
1073 info!(
1074 "Config: industry={:?}, period_months={}, companies={}",
1075 self.config.global.industry,
1076 self.config.global.period_months,
1077 self.config.companies.len()
1078 );
1079
1080 let initial_level = self.check_resources_with_log("initial")?;
1082 if initial_level == DegradationLevel::Emergency {
1083 return Err(SynthError::resource(
1084 "Insufficient resources to start generation",
1085 ));
1086 }
1087
1088 let mut stats = EnhancedGenerationStatistics {
1089 companies_count: self.config.companies.len(),
1090 period_months: self.config.global.period_months,
1091 ..Default::default()
1092 };
1093
1094 let coa = self.phase_chart_of_accounts(&mut stats)?;
1096
1097 self.phase_master_data(&mut stats)?;
1099
1100 let (document_flows, subledger) = self.phase_document_flows(&mut stats)?;
1102
1103 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1105
1106 let actions = self.get_degradation_actions();
1108
1109 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1111
1112 let balance_validation = self.phase_balance_validation(&entries)?;
1114
1115 let data_quality_stats =
1117 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1118
1119 let audit = self.phase_audit_data(&entries, &mut stats)?;
1121
1122 let banking = self.phase_banking_data(&mut stats)?;
1124
1125 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1127
1128 self.phase_llm_enrichment(&mut stats);
1130
1131 self.phase_diffusion_enhancement(&mut stats);
1133
1134 self.phase_causal_overlay(&mut stats);
1136
1137 let sourcing = self.phase_sourcing_data(&mut stats)?;
1139
1140 let financial_reporting = self.phase_financial_reporting(&document_flows, &mut stats)?;
1142
1143 let hr = self.phase_hr_data(&mut stats)?;
1145
1146 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1148
1149 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1151
1152 let ocpm = self.phase_ocpm_events(
1154 &document_flows,
1155 &sourcing,
1156 &hr,
1157 &manufacturing_snap,
1158 &banking,
1159 &audit,
1160 &financial_reporting,
1161 &mut stats,
1162 )?;
1163
1164 let sales_kpi_budgets = self.phase_sales_kpi_budgets(&coa, &mut stats)?;
1166
1167 self.phase_hypergraph_export(
1169 &coa,
1170 &entries,
1171 &document_flows,
1172 &sourcing,
1173 &hr,
1174 &manufacturing_snap,
1175 &banking,
1176 &audit,
1177 &financial_reporting,
1178 &ocpm,
1179 &mut stats,
1180 )?;
1181
1182 let resource_stats = self.resource_guard.stats();
1184 info!(
1185 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1186 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1187 resource_stats.disk.estimated_bytes_written,
1188 resource_stats.degradation_level
1189 );
1190
1191 let lineage = self.build_lineage_graph();
1193
1194 Ok(EnhancedGenerationResult {
1195 chart_of_accounts: (*coa).clone(),
1196 master_data: self.master_data.clone(),
1197 document_flows,
1198 subledger,
1199 ocpm,
1200 audit,
1201 banking,
1202 graph_export,
1203 sourcing,
1204 financial_reporting,
1205 hr,
1206 accounting_standards,
1207 manufacturing: manufacturing_snap,
1208 sales_kpi_budgets,
1209 journal_entries: entries,
1210 anomaly_labels,
1211 balance_validation,
1212 data_quality_stats,
1213 statistics: stats,
1214 lineage: Some(lineage),
1215 gate_result: None,
1216 })
1217 }
1218
1219 fn phase_chart_of_accounts(
1225 &mut self,
1226 stats: &mut EnhancedGenerationStatistics,
1227 ) -> SynthResult<Arc<ChartOfAccounts>> {
1228 info!("Phase 1: Generating Chart of Accounts");
1229 let coa = self.generate_coa()?;
1230 stats.accounts_count = coa.account_count();
1231 info!(
1232 "Chart of Accounts generated: {} accounts",
1233 stats.accounts_count
1234 );
1235 self.check_resources_with_log("post-coa")?;
1236 Ok(coa)
1237 }
1238
1239 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1241 if self.phase_config.generate_master_data {
1242 info!("Phase 2: Generating Master Data");
1243 self.generate_master_data()?;
1244 stats.vendor_count = self.master_data.vendors.len();
1245 stats.customer_count = self.master_data.customers.len();
1246 stats.material_count = self.master_data.materials.len();
1247 stats.asset_count = self.master_data.assets.len();
1248 stats.employee_count = self.master_data.employees.len();
1249 info!(
1250 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1251 stats.vendor_count, stats.customer_count, stats.material_count,
1252 stats.asset_count, stats.employee_count
1253 );
1254 self.check_resources_with_log("post-master-data")?;
1255 } else {
1256 debug!("Phase 2: Skipped (master data generation disabled)");
1257 }
1258 Ok(())
1259 }
1260
1261 fn phase_document_flows(
1263 &mut self,
1264 stats: &mut EnhancedGenerationStatistics,
1265 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot)> {
1266 let mut document_flows = DocumentFlowSnapshot::default();
1267 let mut subledger = SubledgerSnapshot::default();
1268
1269 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1270 info!("Phase 3: Generating Document Flows");
1271 self.generate_document_flows(&mut document_flows)?;
1272 stats.p2p_chain_count = document_flows.p2p_chains.len();
1273 stats.o2c_chain_count = document_flows.o2c_chains.len();
1274 info!(
1275 "Document flows generated: {} P2P chains, {} O2C chains",
1276 stats.p2p_chain_count, stats.o2c_chain_count
1277 );
1278
1279 debug!("Phase 3b: Linking document flows to subledgers");
1281 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1282 stats.ap_invoice_count = subledger.ap_invoices.len();
1283 stats.ar_invoice_count = subledger.ar_invoices.len();
1284 debug!(
1285 "Subledgers linked: {} AP invoices, {} AR invoices",
1286 stats.ap_invoice_count, stats.ar_invoice_count
1287 );
1288
1289 self.check_resources_with_log("post-document-flows")?;
1290 } else {
1291 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1292 }
1293
1294 Ok((document_flows, subledger))
1295 }
1296
1297 #[allow(clippy::too_many_arguments)]
1299 fn phase_ocpm_events(
1300 &mut self,
1301 document_flows: &DocumentFlowSnapshot,
1302 sourcing: &SourcingSnapshot,
1303 hr: &HrSnapshot,
1304 manufacturing: &ManufacturingSnapshot,
1305 banking: &BankingSnapshot,
1306 audit: &AuditSnapshot,
1307 financial_reporting: &FinancialReportingSnapshot,
1308 stats: &mut EnhancedGenerationStatistics,
1309 ) -> SynthResult<OcpmSnapshot> {
1310 if self.phase_config.generate_ocpm_events {
1311 info!("Phase 3c: Generating OCPM Events");
1312 let ocpm_snapshot = self.generate_ocpm_events(
1313 document_flows,
1314 sourcing,
1315 hr,
1316 manufacturing,
1317 banking,
1318 audit,
1319 financial_reporting,
1320 )?;
1321 stats.ocpm_event_count = ocpm_snapshot.event_count;
1322 stats.ocpm_object_count = ocpm_snapshot.object_count;
1323 stats.ocpm_case_count = ocpm_snapshot.case_count;
1324 info!(
1325 "OCPM events generated: {} events, {} objects, {} cases",
1326 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1327 );
1328 self.check_resources_with_log("post-ocpm")?;
1329 Ok(ocpm_snapshot)
1330 } else {
1331 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1332 Ok(OcpmSnapshot::default())
1333 }
1334 }
1335
1336 fn phase_journal_entries(
1338 &mut self,
1339 coa: &Arc<ChartOfAccounts>,
1340 document_flows: &DocumentFlowSnapshot,
1341 stats: &mut EnhancedGenerationStatistics,
1342 ) -> SynthResult<Vec<JournalEntry>> {
1343 let mut entries = Vec::new();
1344
1345 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1347 debug!("Phase 4a: Generating JEs from document flows");
1348 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1349 debug!("Generated {} JEs from document flows", flow_entries.len());
1350 entries.extend(flow_entries);
1351 }
1352
1353 if self.phase_config.generate_journal_entries {
1355 info!("Phase 4: Generating Journal Entries");
1356 let je_entries = self.generate_journal_entries(coa)?;
1357 info!("Generated {} standalone journal entries", je_entries.len());
1358 entries.extend(je_entries);
1359 } else {
1360 debug!("Phase 4: Skipped (journal entry generation disabled)");
1361 }
1362
1363 if !entries.is_empty() {
1364 stats.total_entries = entries.len() as u64;
1365 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1366 info!(
1367 "Total entries: {}, total line items: {}",
1368 stats.total_entries, stats.total_line_items
1369 );
1370 self.check_resources_with_log("post-journal-entries")?;
1371 }
1372
1373 Ok(entries)
1374 }
1375
1376 fn phase_anomaly_injection(
1378 &mut self,
1379 entries: &mut [JournalEntry],
1380 actions: &DegradationActions,
1381 stats: &mut EnhancedGenerationStatistics,
1382 ) -> SynthResult<AnomalyLabels> {
1383 if self.phase_config.inject_anomalies
1384 && !entries.is_empty()
1385 && !actions.skip_anomaly_injection
1386 {
1387 info!("Phase 5: Injecting Anomalies");
1388 let result = self.inject_anomalies(entries)?;
1389 stats.anomalies_injected = result.labels.len();
1390 info!("Injected {} anomalies", stats.anomalies_injected);
1391 self.check_resources_with_log("post-anomaly-injection")?;
1392 Ok(result)
1393 } else if actions.skip_anomaly_injection {
1394 warn!("Phase 5: Skipped due to resource degradation");
1395 Ok(AnomalyLabels::default())
1396 } else {
1397 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1398 Ok(AnomalyLabels::default())
1399 }
1400 }
1401
1402 fn phase_balance_validation(
1404 &mut self,
1405 entries: &[JournalEntry],
1406 ) -> SynthResult<BalanceValidationResult> {
1407 if self.phase_config.validate_balances && !entries.is_empty() {
1408 debug!("Phase 6: Validating Balances");
1409 let balance_validation = self.validate_journal_entries(entries)?;
1410 if balance_validation.is_balanced {
1411 debug!("Balance validation passed");
1412 } else {
1413 warn!(
1414 "Balance validation found {} errors",
1415 balance_validation.validation_errors.len()
1416 );
1417 }
1418 Ok(balance_validation)
1419 } else {
1420 Ok(BalanceValidationResult::default())
1421 }
1422 }
1423
1424 fn phase_data_quality_injection(
1426 &mut self,
1427 entries: &mut [JournalEntry],
1428 actions: &DegradationActions,
1429 stats: &mut EnhancedGenerationStatistics,
1430 ) -> SynthResult<DataQualityStats> {
1431 if self.phase_config.inject_data_quality
1432 && !entries.is_empty()
1433 && !actions.skip_data_quality
1434 {
1435 info!("Phase 7: Injecting Data Quality Variations");
1436 let dq_stats = self.inject_data_quality(entries)?;
1437 stats.data_quality_issues = dq_stats.records_with_issues;
1438 info!("Injected {} data quality issues", stats.data_quality_issues);
1439 self.check_resources_with_log("post-data-quality")?;
1440 Ok(dq_stats)
1441 } else if actions.skip_data_quality {
1442 warn!("Phase 7: Skipped due to resource degradation");
1443 Ok(DataQualityStats::default())
1444 } else {
1445 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1446 Ok(DataQualityStats::default())
1447 }
1448 }
1449
1450 fn phase_audit_data(
1452 &mut self,
1453 entries: &[JournalEntry],
1454 stats: &mut EnhancedGenerationStatistics,
1455 ) -> SynthResult<AuditSnapshot> {
1456 if self.phase_config.generate_audit {
1457 info!("Phase 8: Generating Audit Data");
1458 let audit_snapshot = self.generate_audit_data(entries)?;
1459 stats.audit_engagement_count = audit_snapshot.engagements.len();
1460 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1461 stats.audit_evidence_count = audit_snapshot.evidence.len();
1462 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1463 stats.audit_finding_count = audit_snapshot.findings.len();
1464 stats.audit_judgment_count = audit_snapshot.judgments.len();
1465 info!(
1466 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1467 stats.audit_engagement_count, stats.audit_workpaper_count,
1468 stats.audit_evidence_count, stats.audit_risk_count,
1469 stats.audit_finding_count, stats.audit_judgment_count
1470 );
1471 self.check_resources_with_log("post-audit")?;
1472 Ok(audit_snapshot)
1473 } else {
1474 debug!("Phase 8: Skipped (audit generation disabled)");
1475 Ok(AuditSnapshot::default())
1476 }
1477 }
1478
1479 fn phase_banking_data(
1481 &mut self,
1482 stats: &mut EnhancedGenerationStatistics,
1483 ) -> SynthResult<BankingSnapshot> {
1484 if self.phase_config.generate_banking && self.config.banking.enabled {
1485 info!("Phase 9: Generating Banking KYC/AML Data");
1486 let banking_snapshot = self.generate_banking_data()?;
1487 stats.banking_customer_count = banking_snapshot.customers.len();
1488 stats.banking_account_count = banking_snapshot.accounts.len();
1489 stats.banking_transaction_count = banking_snapshot.transactions.len();
1490 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1491 info!(
1492 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1493 stats.banking_customer_count, stats.banking_account_count,
1494 stats.banking_transaction_count, stats.banking_suspicious_count
1495 );
1496 self.check_resources_with_log("post-banking")?;
1497 Ok(banking_snapshot)
1498 } else {
1499 debug!("Phase 9: Skipped (banking generation disabled)");
1500 Ok(BankingSnapshot::default())
1501 }
1502 }
1503
1504 fn phase_graph_export(
1506 &mut self,
1507 entries: &[JournalEntry],
1508 coa: &Arc<ChartOfAccounts>,
1509 stats: &mut EnhancedGenerationStatistics,
1510 ) -> SynthResult<GraphExportSnapshot> {
1511 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
1512 && !entries.is_empty()
1513 {
1514 info!("Phase 10: Exporting Accounting Network Graphs");
1515 match self.export_graphs(entries, coa, stats) {
1516 Ok(snapshot) => {
1517 info!(
1518 "Graph export complete: {} graphs ({} nodes, {} edges)",
1519 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1520 );
1521 Ok(snapshot)
1522 }
1523 Err(e) => {
1524 warn!("Phase 10: Graph export failed: {}", e);
1525 Ok(GraphExportSnapshot::default())
1526 }
1527 }
1528 } else {
1529 debug!("Phase 10: Skipped (graph export disabled or no entries)");
1530 Ok(GraphExportSnapshot::default())
1531 }
1532 }
1533
1534 #[allow(clippy::too_many_arguments)]
1536 fn phase_hypergraph_export(
1537 &self,
1538 coa: &Arc<ChartOfAccounts>,
1539 entries: &[JournalEntry],
1540 document_flows: &DocumentFlowSnapshot,
1541 sourcing: &SourcingSnapshot,
1542 hr: &HrSnapshot,
1543 manufacturing: &ManufacturingSnapshot,
1544 banking: &BankingSnapshot,
1545 audit: &AuditSnapshot,
1546 financial_reporting: &FinancialReportingSnapshot,
1547 ocpm: &OcpmSnapshot,
1548 stats: &mut EnhancedGenerationStatistics,
1549 ) -> SynthResult<()> {
1550 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
1551 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
1552 match self.export_hypergraph(
1553 coa,
1554 entries,
1555 document_flows,
1556 sourcing,
1557 hr,
1558 manufacturing,
1559 banking,
1560 audit,
1561 financial_reporting,
1562 ocpm,
1563 stats,
1564 ) {
1565 Ok(info) => {
1566 info!(
1567 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
1568 info.node_count, info.edge_count, info.hyperedge_count
1569 );
1570 }
1571 Err(e) => {
1572 warn!("Phase 10b: Hypergraph export failed: {}", e);
1573 }
1574 }
1575 } else {
1576 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
1577 }
1578 Ok(())
1579 }
1580
1581 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
1587 if !self.config.llm.enabled {
1588 debug!("Phase 11: Skipped (LLM enrichment disabled)");
1589 return;
1590 }
1591
1592 info!("Phase 11: Starting LLM Enrichment");
1593 let start = std::time::Instant::now();
1594
1595 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1596 let provider = Arc::new(MockLlmProvider::new(self.seed));
1597 let enricher = VendorLlmEnricher::new(provider);
1598
1599 let industry = format!("{:?}", self.config.global.industry);
1600 let max_enrichments = self
1601 .config
1602 .llm
1603 .max_vendor_enrichments
1604 .min(self.master_data.vendors.len());
1605
1606 let mut enriched_count = 0usize;
1607 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
1608 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
1609 Ok(name) => {
1610 vendor.name = name;
1611 enriched_count += 1;
1612 }
1613 Err(e) => {
1614 warn!(
1615 "LLM vendor enrichment failed for {}: {}",
1616 vendor.vendor_id, e
1617 );
1618 }
1619 }
1620 }
1621
1622 enriched_count
1623 }));
1624
1625 match result {
1626 Ok(enriched_count) => {
1627 stats.llm_vendors_enriched = enriched_count;
1628 let elapsed = start.elapsed();
1629 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1630 info!(
1631 "Phase 11 complete: {} vendors enriched in {}ms",
1632 enriched_count, stats.llm_enrichment_ms
1633 );
1634 }
1635 Err(_) => {
1636 let elapsed = start.elapsed();
1637 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1638 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
1639 }
1640 }
1641 }
1642
1643 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
1649 if !self.config.diffusion.enabled {
1650 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
1651 return;
1652 }
1653
1654 info!("Phase 12: Starting Diffusion Enhancement");
1655 let start = std::time::Instant::now();
1656
1657 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1658 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
1661
1662 let diffusion_config = DiffusionConfig {
1663 n_steps: self.config.diffusion.n_steps,
1664 seed: self.seed,
1665 ..Default::default()
1666 };
1667
1668 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
1669
1670 let n_samples = self.config.diffusion.sample_size;
1671 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
1673
1674 samples.len()
1675 }));
1676
1677 match result {
1678 Ok(sample_count) => {
1679 stats.diffusion_samples_generated = sample_count;
1680 let elapsed = start.elapsed();
1681 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1682 info!(
1683 "Phase 12 complete: {} diffusion samples generated in {}ms",
1684 sample_count, stats.diffusion_enhancement_ms
1685 );
1686 }
1687 Err(_) => {
1688 let elapsed = start.elapsed();
1689 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1690 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
1691 }
1692 }
1693 }
1694
1695 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
1702 if !self.config.causal.enabled {
1703 debug!("Phase 13: Skipped (causal generation disabled)");
1704 return;
1705 }
1706
1707 info!("Phase 13: Starting Causal Overlay");
1708 let start = std::time::Instant::now();
1709
1710 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1711 let graph = match self.config.causal.template.as_str() {
1713 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
1714 _ => CausalGraph::fraud_detection_template(),
1715 };
1716
1717 let scm = StructuralCausalModel::new(graph.clone())
1718 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
1719
1720 let n_samples = self.config.causal.sample_size;
1721 let samples = scm
1722 .generate(n_samples, self.seed)
1723 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
1724
1725 let validation_passed = if self.config.causal.validate {
1727 let report = CausalValidator::validate_causal_structure(&samples, &graph);
1728 if report.valid {
1729 info!(
1730 "Causal validation passed: all {} checks OK",
1731 report.checks.len()
1732 );
1733 } else {
1734 warn!(
1735 "Causal validation: {} violations detected: {:?}",
1736 report.violations.len(),
1737 report.violations
1738 );
1739 }
1740 Some(report.valid)
1741 } else {
1742 None
1743 };
1744
1745 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
1746 }));
1747
1748 match result {
1749 Ok(Ok((sample_count, validation_passed))) => {
1750 stats.causal_samples_generated = sample_count;
1751 stats.causal_validation_passed = validation_passed;
1752 let elapsed = start.elapsed();
1753 stats.causal_generation_ms = elapsed.as_millis() as u64;
1754 info!(
1755 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
1756 sample_count, stats.causal_generation_ms, validation_passed,
1757 );
1758 }
1759 Ok(Err(e)) => {
1760 let elapsed = start.elapsed();
1761 stats.causal_generation_ms = elapsed.as_millis() as u64;
1762 warn!("Phase 13: Causal generation failed: {}", e);
1763 }
1764 Err(_) => {
1765 let elapsed = start.elapsed();
1766 stats.causal_generation_ms = elapsed.as_millis() as u64;
1767 warn!("Phase 13: Causal generation failed (panic caught), continuing");
1768 }
1769 }
1770 }
1771
1772 fn phase_sourcing_data(
1774 &mut self,
1775 stats: &mut EnhancedGenerationStatistics,
1776 ) -> SynthResult<SourcingSnapshot> {
1777 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
1778 debug!("Phase 14: Skipped (sourcing generation disabled)");
1779 return Ok(SourcingSnapshot::default());
1780 }
1781
1782 info!("Phase 14: Generating S2C Sourcing Data");
1783 let seed = self.seed;
1784
1785 let vendor_ids: Vec<String> = self
1787 .master_data
1788 .vendors
1789 .iter()
1790 .map(|v| v.vendor_id.clone())
1791 .collect();
1792 if vendor_ids.is_empty() {
1793 debug!("Phase 14: Skipped (no vendors available)");
1794 return Ok(SourcingSnapshot::default());
1795 }
1796
1797 let categories: Vec<(String, String)> = vec![
1798 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
1799 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
1800 ("CAT-IT".to_string(), "IT Equipment".to_string()),
1801 ("CAT-SVC".to_string(), "Professional Services".to_string()),
1802 ("CAT-LOG".to_string(), "Logistics".to_string()),
1803 ];
1804 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
1805 .iter()
1806 .map(|(id, name)| {
1807 (
1808 id.clone(),
1809 name.clone(),
1810 rust_decimal::Decimal::from(100_000),
1811 )
1812 })
1813 .collect();
1814
1815 let company_code = self
1816 .config
1817 .companies
1818 .first()
1819 .map(|c| c.code.as_str())
1820 .unwrap_or("1000");
1821 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1822 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1823 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1824 let fiscal_year = start_date.year() as u16;
1825 let owner_ids: Vec<String> = self
1826 .master_data
1827 .employees
1828 .iter()
1829 .take(5)
1830 .map(|e| e.employee_id.clone())
1831 .collect();
1832 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
1833
1834 let mut spend_gen = SpendAnalysisGenerator::new(seed);
1836 let spend_analyses =
1837 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
1838
1839 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
1841 let sourcing_projects = if owner_ids.is_empty() {
1842 Vec::new()
1843 } else {
1844 project_gen.generate(
1845 company_code,
1846 &categories_with_spend,
1847 &owner_ids,
1848 start_date,
1849 self.config.global.period_months,
1850 )
1851 };
1852 stats.sourcing_project_count = sourcing_projects.len();
1853
1854 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
1856 let mut qual_gen = QualificationGenerator::new(seed + 2);
1857 let qualifications = qual_gen.generate(
1858 company_code,
1859 &qual_vendor_ids,
1860 sourcing_projects.first().map(|p| p.project_id.as_str()),
1861 owner_id,
1862 start_date,
1863 );
1864
1865 let mut rfx_gen = RfxGenerator::new(seed + 3);
1867 let rfx_events: Vec<RfxEvent> = sourcing_projects
1868 .iter()
1869 .map(|proj| {
1870 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
1871 rfx_gen.generate(
1872 company_code,
1873 &proj.project_id,
1874 &proj.category_id,
1875 &qualified_vids,
1876 owner_id,
1877 start_date,
1878 50000.0,
1879 )
1880 })
1881 .collect();
1882 stats.rfx_event_count = rfx_events.len();
1883
1884 let mut bid_gen = BidGenerator::new(seed + 4);
1886 let mut all_bids = Vec::new();
1887 for rfx in &rfx_events {
1888 let bidder_count = vendor_ids.len().clamp(2, 5);
1889 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
1890 let bids = bid_gen.generate(rfx, &responding, start_date);
1891 all_bids.extend(bids);
1892 }
1893 stats.bid_count = all_bids.len();
1894
1895 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
1897 let bid_evaluations: Vec<BidEvaluation> = rfx_events
1898 .iter()
1899 .map(|rfx| {
1900 let rfx_bids: Vec<SupplierBid> = all_bids
1901 .iter()
1902 .filter(|b| b.rfx_id == rfx.rfx_id)
1903 .cloned()
1904 .collect();
1905 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
1906 })
1907 .collect();
1908
1909 let mut contract_gen = ContractGenerator::new(seed + 6);
1911 let contracts: Vec<ProcurementContract> = bid_evaluations
1912 .iter()
1913 .zip(rfx_events.iter())
1914 .filter_map(|(eval, rfx)| {
1915 eval.ranked_bids.first().and_then(|winner| {
1916 all_bids
1917 .iter()
1918 .find(|b| b.bid_id == winner.bid_id)
1919 .map(|winning_bid| {
1920 contract_gen.generate_from_bid(
1921 winning_bid,
1922 Some(&rfx.sourcing_project_id),
1923 &rfx.category_id,
1924 owner_id,
1925 start_date,
1926 )
1927 })
1928 })
1929 })
1930 .collect();
1931 stats.contract_count = contracts.len();
1932
1933 let mut catalog_gen = CatalogGenerator::new(seed + 7);
1935 let catalog_items = catalog_gen.generate(&contracts);
1936 stats.catalog_item_count = catalog_items.len();
1937
1938 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
1940 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
1941 .iter()
1942 .fold(
1943 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
1944 |mut acc, c| {
1945 acc.entry(c.vendor_id.clone()).or_default().push(c);
1946 acc
1947 },
1948 )
1949 .into_iter()
1950 .collect();
1951 let scorecards = scorecard_gen.generate(
1952 company_code,
1953 &vendor_contracts,
1954 start_date,
1955 end_date,
1956 owner_id,
1957 );
1958 stats.scorecard_count = scorecards.len();
1959
1960 info!(
1961 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
1962 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
1963 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
1964 );
1965 self.check_resources_with_log("post-sourcing")?;
1966
1967 Ok(SourcingSnapshot {
1968 spend_analyses,
1969 sourcing_projects,
1970 qualifications,
1971 rfx_events,
1972 bids: all_bids,
1973 bid_evaluations,
1974 contracts,
1975 catalog_items,
1976 scorecards,
1977 })
1978 }
1979
1980 fn phase_financial_reporting(
1982 &mut self,
1983 document_flows: &DocumentFlowSnapshot,
1984 stats: &mut EnhancedGenerationStatistics,
1985 ) -> SynthResult<FinancialReportingSnapshot> {
1986 let fs_enabled = self.phase_config.generate_financial_statements
1987 || self.config.financial_reporting.enabled;
1988 let br_enabled = self.phase_config.generate_bank_reconciliation;
1989
1990 if !fs_enabled && !br_enabled {
1991 debug!("Phase 15: Skipped (financial reporting disabled)");
1992 return Ok(FinancialReportingSnapshot::default());
1993 }
1994
1995 info!("Phase 15: Generating Financial Reporting Data");
1996
1997 let seed = self.seed;
1998 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1999 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2000
2001 let mut financial_statements = Vec::new();
2002 let mut bank_reconciliations = Vec::new();
2003
2004 if fs_enabled {
2006 let company_code = self
2007 .config
2008 .companies
2009 .first()
2010 .map(|c| c.code.as_str())
2011 .unwrap_or("1000");
2012 let currency = self
2013 .config
2014 .companies
2015 .first()
2016 .map(|c| c.currency.as_str())
2017 .unwrap_or("USD");
2018 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2019
2020 for period in 0..self.config.global.period_months {
2022 let period_start = start_date + chrono::Months::new(period);
2023 let period_end =
2024 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2025 let fiscal_year = period_end.year() as u16;
2026 let fiscal_period = period_end.month() as u8;
2027
2028 let tb_entries = self.build_trial_balance_from_flows(document_flows, &period_end);
2030
2031 let stmts = fs_gen.generate(
2032 company_code,
2033 currency,
2034 &tb_entries,
2035 period_start,
2036 period_end,
2037 fiscal_year,
2038 fiscal_period,
2039 None,
2040 "SYS-AUTOCLOSE",
2041 );
2042 financial_statements.extend(stmts);
2043 }
2044 stats.financial_statement_count = financial_statements.len();
2045 info!(
2046 "Financial statements generated: {} statements",
2047 stats.financial_statement_count
2048 );
2049 }
2050
2051 if br_enabled && !document_flows.payments.is_empty() {
2053 let mut br_gen = BankReconciliationGenerator::new(seed + 25);
2054
2055 for company in &self.config.companies {
2057 let company_payments: Vec<PaymentReference> = document_flows
2058 .payments
2059 .iter()
2060 .filter(|p| p.header.company_code == company.code)
2061 .map(|p| PaymentReference {
2062 id: p.header.document_id.clone(),
2063 amount: if p.is_vendor { p.amount } else { -p.amount },
2064 date: p.header.document_date,
2065 reference: p
2066 .check_number
2067 .clone()
2068 .or_else(|| p.wire_reference.clone())
2069 .unwrap_or_else(|| p.header.document_id.clone()),
2070 })
2071 .collect();
2072
2073 if company_payments.is_empty() {
2074 continue;
2075 }
2076
2077 let bank_account_id = format!("{}-MAIN", company.code);
2078
2079 for period in 0..self.config.global.period_months {
2081 let period_start = start_date + chrono::Months::new(period);
2082 let period_end =
2083 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2084
2085 let period_payments: Vec<PaymentReference> = company_payments
2086 .iter()
2087 .filter(|p| p.date >= period_start && p.date <= period_end)
2088 .cloned()
2089 .collect();
2090
2091 let recon = br_gen.generate(
2092 &company.code,
2093 &bank_account_id,
2094 period_start,
2095 period_end,
2096 &company.currency,
2097 &period_payments,
2098 );
2099 bank_reconciliations.push(recon);
2100 }
2101 }
2102 info!(
2103 "Bank reconciliations generated: {} reconciliations",
2104 bank_reconciliations.len()
2105 );
2106 }
2107
2108 stats.bank_reconciliation_count = bank_reconciliations.len();
2109 self.check_resources_with_log("post-financial-reporting")?;
2110
2111 Ok(FinancialReportingSnapshot {
2112 financial_statements,
2113 bank_reconciliations,
2114 })
2115 }
2116
2117 fn build_trial_balance_from_flows(
2119 &self,
2120 flows: &DocumentFlowSnapshot,
2121 _period_end: &NaiveDate,
2122 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
2123 use rust_decimal::Decimal;
2124
2125 let mut entries = Vec::new();
2126
2127 let ar_total: Decimal = flows
2129 .customer_invoices
2130 .iter()
2131 .map(|ci| ci.total_gross_amount)
2132 .sum();
2133 if !ar_total.is_zero() {
2134 entries.push(datasynth_generators::TrialBalanceEntry {
2135 account_code: "1100".to_string(),
2136 account_name: "Accounts Receivable".to_string(),
2137 category: "Receivables".to_string(),
2138 debit_balance: ar_total,
2139 credit_balance: Decimal::ZERO,
2140 });
2141 }
2142
2143 let ap_total: Decimal = flows
2145 .vendor_invoices
2146 .iter()
2147 .map(|vi| vi.payable_amount)
2148 .sum();
2149 if !ap_total.is_zero() {
2150 entries.push(datasynth_generators::TrialBalanceEntry {
2151 account_code: "2000".to_string(),
2152 account_name: "Accounts Payable".to_string(),
2153 category: "Payables".to_string(),
2154 debit_balance: Decimal::ZERO,
2155 credit_balance: ap_total,
2156 });
2157 }
2158
2159 let revenue: Decimal = flows
2161 .customer_invoices
2162 .iter()
2163 .map(|ci| ci.total_gross_amount)
2164 .sum();
2165 if !revenue.is_zero() {
2166 entries.push(datasynth_generators::TrialBalanceEntry {
2167 account_code: "4000".to_string(),
2168 account_name: "Revenue".to_string(),
2169 category: "Revenue".to_string(),
2170 debit_balance: Decimal::ZERO,
2171 credit_balance: revenue,
2172 });
2173 }
2174
2175 let cogs: Decimal = flows
2177 .purchase_orders
2178 .iter()
2179 .map(|po| po.total_net_amount)
2180 .sum();
2181 if !cogs.is_zero() {
2182 entries.push(datasynth_generators::TrialBalanceEntry {
2183 account_code: "5000".to_string(),
2184 account_name: "Cost of Goods Sold".to_string(),
2185 category: "CostOfSales".to_string(),
2186 debit_balance: cogs,
2187 credit_balance: Decimal::ZERO,
2188 });
2189 }
2190
2191 let payments_out: Decimal = flows.payments.iter().map(|p| p.amount).sum();
2193 if !payments_out.is_zero() {
2194 entries.push(datasynth_generators::TrialBalanceEntry {
2195 account_code: "1000".to_string(),
2196 account_name: "Cash".to_string(),
2197 category: "Cash".to_string(),
2198 debit_balance: payments_out,
2199 credit_balance: Decimal::ZERO,
2200 });
2201 }
2202
2203 entries
2204 }
2205
2206 fn phase_hr_data(
2208 &mut self,
2209 stats: &mut EnhancedGenerationStatistics,
2210 ) -> SynthResult<HrSnapshot> {
2211 if !self.config.hr.enabled {
2212 debug!("Phase 16: Skipped (HR generation disabled)");
2213 return Ok(HrSnapshot::default());
2214 }
2215
2216 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
2217
2218 let seed = self.seed;
2219 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2220 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2221 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2222 let company_code = self
2223 .config
2224 .companies
2225 .first()
2226 .map(|c| c.code.as_str())
2227 .unwrap_or("1000");
2228 let currency = self
2229 .config
2230 .companies
2231 .first()
2232 .map(|c| c.currency.as_str())
2233 .unwrap_or("USD");
2234
2235 let employee_ids: Vec<String> = self
2236 .master_data
2237 .employees
2238 .iter()
2239 .map(|e| e.employee_id.clone())
2240 .collect();
2241
2242 if employee_ids.is_empty() {
2243 debug!("Phase 16: Skipped (no employees available)");
2244 return Ok(HrSnapshot::default());
2245 }
2246
2247 let mut snapshot = HrSnapshot::default();
2248
2249 if self.config.hr.payroll.enabled {
2251 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30);
2252 let employees_with_salary: Vec<(
2253 String,
2254 rust_decimal::Decimal,
2255 Option<String>,
2256 Option<String>,
2257 )> = self
2258 .master_data
2259 .employees
2260 .iter()
2261 .map(|e| {
2262 (
2263 e.employee_id.clone(),
2264 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
2266 e.department_id.clone(),
2267 )
2268 })
2269 .collect();
2270
2271 for month in 0..self.config.global.period_months {
2272 let period_start = start_date + chrono::Months::new(month);
2273 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
2274 let (run, items) = payroll_gen.generate(
2275 company_code,
2276 &employees_with_salary,
2277 period_start,
2278 period_end,
2279 currency,
2280 );
2281 snapshot.payroll_runs.push(run);
2282 snapshot.payroll_run_count += 1;
2283 snapshot.payroll_line_item_count += items.len();
2284 snapshot.payroll_line_items.extend(items);
2285 }
2286 }
2287
2288 if self.config.hr.time_attendance.enabled {
2290 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31);
2291 let entries = time_gen.generate(
2292 &employee_ids,
2293 start_date,
2294 end_date,
2295 &self.config.hr.time_attendance,
2296 );
2297 snapshot.time_entry_count = entries.len();
2298 snapshot.time_entries = entries;
2299 }
2300
2301 if self.config.hr.expenses.enabled {
2303 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32);
2304 let reports = expense_gen.generate(
2305 &employee_ids,
2306 start_date,
2307 end_date,
2308 &self.config.hr.expenses,
2309 );
2310 snapshot.expense_report_count = reports.len();
2311 snapshot.expense_reports = reports;
2312 }
2313
2314 stats.payroll_run_count = snapshot.payroll_run_count;
2315 stats.time_entry_count = snapshot.time_entry_count;
2316 stats.expense_report_count = snapshot.expense_report_count;
2317
2318 info!(
2319 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
2320 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
2321 snapshot.time_entry_count, snapshot.expense_report_count
2322 );
2323 self.check_resources_with_log("post-hr")?;
2324
2325 Ok(snapshot)
2326 }
2327
2328 fn phase_accounting_standards(
2330 &mut self,
2331 stats: &mut EnhancedGenerationStatistics,
2332 ) -> SynthResult<AccountingStandardsSnapshot> {
2333 if !self.phase_config.generate_accounting_standards
2334 || !self.config.accounting_standards.enabled
2335 {
2336 debug!("Phase 17: Skipped (accounting standards generation disabled)");
2337 return Ok(AccountingStandardsSnapshot::default());
2338 }
2339 info!("Phase 17: Generating Accounting Standards Data");
2340
2341 let seed = self.seed;
2342 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2343 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2344 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2345 let company_code = self
2346 .config
2347 .companies
2348 .first()
2349 .map(|c| c.code.as_str())
2350 .unwrap_or("1000");
2351 let currency = self
2352 .config
2353 .companies
2354 .first()
2355 .map(|c| c.currency.as_str())
2356 .unwrap_or("USD");
2357
2358 let framework = match self.config.accounting_standards.framework {
2360 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => {
2361 datasynth_standards::framework::AccountingFramework::UsGaap
2362 }
2363 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => {
2364 datasynth_standards::framework::AccountingFramework::Ifrs
2365 }
2366 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
2367 datasynth_standards::framework::AccountingFramework::DualReporting
2368 }
2369 };
2370
2371 let mut snapshot = AccountingStandardsSnapshot::default();
2372
2373 if self.config.accounting_standards.revenue_recognition.enabled {
2375 let customer_ids: Vec<String> = self
2376 .master_data
2377 .customers
2378 .iter()
2379 .map(|c| c.customer_id.clone())
2380 .collect();
2381
2382 if !customer_ids.is_empty() {
2383 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
2384 let contracts = rev_gen.generate(
2385 company_code,
2386 &customer_ids,
2387 start_date,
2388 end_date,
2389 currency,
2390 &self.config.accounting_standards.revenue_recognition,
2391 framework,
2392 );
2393 snapshot.revenue_contract_count = contracts.len();
2394 }
2395 }
2396
2397 if self.config.accounting_standards.impairment.enabled {
2399 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
2400 .master_data
2401 .assets
2402 .iter()
2403 .map(|a| {
2404 (
2405 a.asset_id.clone(),
2406 a.description.clone(),
2407 a.acquisition_cost,
2408 )
2409 })
2410 .collect();
2411
2412 if !asset_data.is_empty() {
2413 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
2414 let tests = imp_gen.generate(
2415 company_code,
2416 &asset_data,
2417 end_date,
2418 &self.config.accounting_standards.impairment,
2419 framework,
2420 );
2421 snapshot.impairment_test_count = tests.len();
2422 }
2423 }
2424
2425 stats.revenue_contract_count = snapshot.revenue_contract_count;
2426 stats.impairment_test_count = snapshot.impairment_test_count;
2427
2428 info!(
2429 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
2430 snapshot.revenue_contract_count, snapshot.impairment_test_count
2431 );
2432 self.check_resources_with_log("post-accounting-standards")?;
2433
2434 Ok(snapshot)
2435 }
2436
2437 fn phase_manufacturing(
2439 &mut self,
2440 stats: &mut EnhancedGenerationStatistics,
2441 ) -> SynthResult<ManufacturingSnapshot> {
2442 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
2443 debug!("Phase 18: Skipped (manufacturing generation disabled)");
2444 return Ok(ManufacturingSnapshot::default());
2445 }
2446 info!("Phase 18: Generating Manufacturing Data");
2447
2448 let seed = self.seed;
2449 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2450 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2451 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2452 let company_code = self
2453 .config
2454 .companies
2455 .first()
2456 .map(|c| c.code.as_str())
2457 .unwrap_or("1000");
2458
2459 let material_data: Vec<(String, String)> = self
2460 .master_data
2461 .materials
2462 .iter()
2463 .map(|m| (m.material_id.clone(), m.description.clone()))
2464 .collect();
2465
2466 if material_data.is_empty() {
2467 debug!("Phase 18: Skipped (no materials available)");
2468 return Ok(ManufacturingSnapshot::default());
2469 }
2470
2471 let mut snapshot = ManufacturingSnapshot::default();
2472
2473 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
2475 let production_orders = prod_gen.generate(
2476 company_code,
2477 &material_data,
2478 start_date,
2479 end_date,
2480 &self.config.manufacturing.production_orders,
2481 &self.config.manufacturing.costing,
2482 &self.config.manufacturing.routing,
2483 );
2484 snapshot.production_order_count = production_orders.len();
2485
2486 let inspection_data: Vec<(String, String, String)> = production_orders
2488 .iter()
2489 .map(|po| {
2490 (
2491 po.order_id.clone(),
2492 po.material_id.clone(),
2493 po.material_description.clone(),
2494 )
2495 })
2496 .collect();
2497
2498 snapshot.production_orders = production_orders;
2499
2500 if !inspection_data.is_empty() {
2501 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
2502 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
2503 snapshot.quality_inspection_count = inspections.len();
2504 snapshot.quality_inspections = inspections;
2505 }
2506
2507 let storage_locations: Vec<(String, String)> = material_data
2509 .iter()
2510 .enumerate()
2511 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
2512 .collect();
2513
2514 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52);
2515 let mut cycle_count_total = 0usize;
2516 for month in 0..self.config.global.period_months {
2517 let count_date = start_date + chrono::Months::new(month);
2518 let items_per_count = storage_locations.len().clamp(10, 50);
2519 let cc = cc_gen.generate(
2520 company_code,
2521 &storage_locations,
2522 count_date,
2523 items_per_count,
2524 );
2525 snapshot.cycle_counts.push(cc);
2526 cycle_count_total += 1;
2527 }
2528 snapshot.cycle_count_count = cycle_count_total;
2529
2530 stats.production_order_count = snapshot.production_order_count;
2531 stats.quality_inspection_count = snapshot.quality_inspection_count;
2532 stats.cycle_count_count = snapshot.cycle_count_count;
2533
2534 info!(
2535 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
2536 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
2537 );
2538 self.check_resources_with_log("post-manufacturing")?;
2539
2540 Ok(snapshot)
2541 }
2542
2543 fn phase_sales_kpi_budgets(
2545 &mut self,
2546 coa: &Arc<ChartOfAccounts>,
2547 stats: &mut EnhancedGenerationStatistics,
2548 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
2549 if !self.phase_config.generate_sales_kpi_budgets {
2550 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
2551 return Ok(SalesKpiBudgetsSnapshot::default());
2552 }
2553 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
2554
2555 let seed = self.seed;
2556 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2557 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2558 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2559 let company_code = self
2560 .config
2561 .companies
2562 .first()
2563 .map(|c| c.code.as_str())
2564 .unwrap_or("1000");
2565
2566 let mut snapshot = SalesKpiBudgetsSnapshot::default();
2567
2568 if self.config.sales_quotes.enabled {
2570 let customer_data: Vec<(String, String)> = self
2571 .master_data
2572 .customers
2573 .iter()
2574 .map(|c| (c.customer_id.clone(), c.name.clone()))
2575 .collect();
2576 let material_data: Vec<(String, String)> = self
2577 .master_data
2578 .materials
2579 .iter()
2580 .map(|m| (m.material_id.clone(), m.description.clone()))
2581 .collect();
2582
2583 if !customer_data.is_empty() && !material_data.is_empty() {
2584 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60);
2585 let quotes = quote_gen.generate(
2586 company_code,
2587 &customer_data,
2588 &material_data,
2589 start_date,
2590 end_date,
2591 &self.config.sales_quotes,
2592 );
2593 snapshot.sales_quote_count = quotes.len();
2594 snapshot.sales_quotes = quotes;
2595 }
2596 }
2597
2598 if self.config.financial_reporting.management_kpis.enabled {
2600 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
2601 let kpis = kpi_gen.generate(
2602 company_code,
2603 start_date,
2604 end_date,
2605 &self.config.financial_reporting.management_kpis,
2606 );
2607 snapshot.kpi_count = kpis.len();
2608 snapshot.kpis = kpis;
2609 }
2610
2611 if self.config.financial_reporting.budgets.enabled {
2613 let account_data: Vec<(String, String)> = coa
2614 .accounts
2615 .iter()
2616 .map(|a| (a.account_number.clone(), a.short_description.clone()))
2617 .collect();
2618
2619 if !account_data.is_empty() {
2620 let fiscal_year = start_date.year() as u32;
2621 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
2622 let budget = budget_gen.generate(
2623 company_code,
2624 fiscal_year,
2625 &account_data,
2626 &self.config.financial_reporting.budgets,
2627 );
2628 snapshot.budget_line_count = budget.line_items.len();
2629 snapshot.budgets.push(budget);
2630 }
2631 }
2632
2633 stats.sales_quote_count = snapshot.sales_quote_count;
2634 stats.kpi_count = snapshot.kpi_count;
2635 stats.budget_line_count = snapshot.budget_line_count;
2636
2637 info!(
2638 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
2639 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
2640 );
2641 self.check_resources_with_log("post-sales-kpi-budgets")?;
2642
2643 Ok(snapshot)
2644 }
2645
2646 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
2648 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
2649
2650 let mut gen = ChartOfAccountsGenerator::new(
2651 self.config.chart_of_accounts.complexity,
2652 self.config.global.industry,
2653 self.seed,
2654 );
2655
2656 let coa = Arc::new(gen.generate());
2657 self.coa = Some(Arc::clone(&coa));
2658
2659 if let Some(pb) = pb {
2660 pb.finish_with_message("Chart of Accounts complete");
2661 }
2662
2663 Ok(coa)
2664 }
2665
2666 fn generate_master_data(&mut self) -> SynthResult<()> {
2668 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2669 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2670 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2671
2672 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
2674
2675 for (i, company) in self.config.companies.iter().enumerate() {
2676 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
2677
2678 let mut vendor_gen = VendorGenerator::new(company_seed);
2680 let vendor_pool = vendor_gen.generate_vendor_pool(
2681 self.phase_config.vendors_per_company,
2682 &company.code,
2683 start_date,
2684 );
2685 self.master_data.vendors.extend(vendor_pool.vendors);
2686 if let Some(pb) = &pb {
2687 pb.inc(1);
2688 }
2689
2690 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
2692 let customer_pool = customer_gen.generate_customer_pool(
2693 self.phase_config.customers_per_company,
2694 &company.code,
2695 start_date,
2696 );
2697 self.master_data.customers.extend(customer_pool.customers);
2698 if let Some(pb) = &pb {
2699 pb.inc(1);
2700 }
2701
2702 let mut material_gen = MaterialGenerator::new(company_seed + 200);
2704 let material_pool = material_gen.generate_material_pool(
2705 self.phase_config.materials_per_company,
2706 &company.code,
2707 start_date,
2708 );
2709 self.master_data.materials.extend(material_pool.materials);
2710 if let Some(pb) = &pb {
2711 pb.inc(1);
2712 }
2713
2714 let mut asset_gen = AssetGenerator::new(company_seed + 300);
2716 let asset_pool = asset_gen.generate_asset_pool(
2717 self.phase_config.assets_per_company,
2718 &company.code,
2719 (start_date, end_date),
2720 );
2721 self.master_data.assets.extend(asset_pool.assets);
2722 if let Some(pb) = &pb {
2723 pb.inc(1);
2724 }
2725
2726 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
2728 let employee_pool =
2729 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
2730 self.master_data.employees.extend(employee_pool.employees);
2731 if let Some(pb) = &pb {
2732 pb.inc(1);
2733 }
2734 }
2735
2736 if let Some(pb) = pb {
2737 pb.finish_with_message("Master data generation complete");
2738 }
2739
2740 Ok(())
2741 }
2742
2743 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
2745 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2746 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2747
2748 let p2p_count = self
2750 .phase_config
2751 .p2p_chains
2752 .min(self.master_data.vendors.len() * 2);
2753 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
2754
2755 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
2757 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
2758
2759 for i in 0..p2p_count {
2760 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
2761 let materials: Vec<&Material> = self
2762 .master_data
2763 .materials
2764 .iter()
2765 .skip(i % self.master_data.materials.len().max(1))
2766 .take(2.min(self.master_data.materials.len()))
2767 .collect();
2768
2769 if materials.is_empty() {
2770 continue;
2771 }
2772
2773 let company = &self.config.companies[i % self.config.companies.len()];
2774 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
2775 let fiscal_period = po_date.month() as u8;
2776 let created_by = self
2777 .master_data
2778 .employees
2779 .first()
2780 .map(|e| e.user_id.as_str())
2781 .unwrap_or("SYSTEM");
2782
2783 let chain = p2p_gen.generate_chain(
2784 &company.code,
2785 vendor,
2786 &materials,
2787 po_date,
2788 start_date.year() as u16,
2789 fiscal_period,
2790 created_by,
2791 );
2792
2793 flows.purchase_orders.push(chain.purchase_order.clone());
2795 flows.goods_receipts.extend(chain.goods_receipts.clone());
2796 if let Some(vi) = &chain.vendor_invoice {
2797 flows.vendor_invoices.push(vi.clone());
2798 }
2799 if let Some(payment) = &chain.payment {
2800 flows.payments.push(payment.clone());
2801 }
2802 flows.p2p_chains.push(chain);
2803
2804 if let Some(pb) = &pb {
2805 pb.inc(1);
2806 }
2807 }
2808
2809 if let Some(pb) = pb {
2810 pb.finish_with_message("P2P document flows complete");
2811 }
2812
2813 let o2c_count = self
2815 .phase_config
2816 .o2c_chains
2817 .min(self.master_data.customers.len() * 2);
2818 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
2819
2820 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
2822 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
2823
2824 for i in 0..o2c_count {
2825 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
2826 let materials: Vec<&Material> = self
2827 .master_data
2828 .materials
2829 .iter()
2830 .skip(i % self.master_data.materials.len().max(1))
2831 .take(2.min(self.master_data.materials.len()))
2832 .collect();
2833
2834 if materials.is_empty() {
2835 continue;
2836 }
2837
2838 let company = &self.config.companies[i % self.config.companies.len()];
2839 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
2840 let fiscal_period = so_date.month() as u8;
2841 let created_by = self
2842 .master_data
2843 .employees
2844 .first()
2845 .map(|e| e.user_id.as_str())
2846 .unwrap_or("SYSTEM");
2847
2848 let chain = o2c_gen.generate_chain(
2849 &company.code,
2850 customer,
2851 &materials,
2852 so_date,
2853 start_date.year() as u16,
2854 fiscal_period,
2855 created_by,
2856 );
2857
2858 flows.sales_orders.push(chain.sales_order.clone());
2860 flows.deliveries.extend(chain.deliveries.clone());
2861 if let Some(ci) = &chain.customer_invoice {
2862 flows.customer_invoices.push(ci.clone());
2863 }
2864 if let Some(receipt) = &chain.customer_receipt {
2865 flows.payments.push(receipt.clone());
2866 }
2867 flows.o2c_chains.push(chain);
2868
2869 if let Some(pb) = &pb {
2870 pb.inc(1);
2871 }
2872 }
2873
2874 if let Some(pb) = pb {
2875 pb.finish_with_message("O2C document flows complete");
2876 }
2877
2878 Ok(())
2879 }
2880
2881 fn generate_journal_entries(
2883 &mut self,
2884 coa: &Arc<ChartOfAccounts>,
2885 ) -> SynthResult<Vec<JournalEntry>> {
2886 let total = self.calculate_total_transactions();
2887 let pb = self.create_progress_bar(total, "Generating Journal Entries");
2888
2889 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2890 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2891 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2892
2893 let company_codes: Vec<String> = self
2894 .config
2895 .companies
2896 .iter()
2897 .map(|c| c.code.clone())
2898 .collect();
2899
2900 let generator = JournalEntryGenerator::new_with_params(
2901 self.config.transactions.clone(),
2902 Arc::clone(coa),
2903 company_codes,
2904 start_date,
2905 end_date,
2906 self.seed,
2907 );
2908
2909 let mut generator = generator
2913 .with_master_data(
2914 &self.master_data.vendors,
2915 &self.master_data.customers,
2916 &self.master_data.materials,
2917 )
2918 .with_persona_errors(true)
2919 .with_fraud_config(self.config.fraud.clone());
2920
2921 if self.config.temporal.enabled {
2923 let drift_config = self.config.temporal.to_core_config();
2924 generator = generator.with_drift_config(drift_config, self.seed + 100);
2925 }
2926
2927 let mut entries = Vec::with_capacity(total as usize);
2928
2929 self.check_memory_limit()?;
2931
2932 const MEMORY_CHECK_INTERVAL: u64 = 1000;
2934
2935 for i in 0..total {
2936 let entry = generator.generate();
2937 entries.push(entry);
2938 if let Some(pb) = &pb {
2939 pb.inc(1);
2940 }
2941
2942 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
2944 self.check_memory_limit()?;
2945 }
2946 }
2947
2948 if let Some(pb) = pb {
2949 pb.finish_with_message("Journal entries complete");
2950 }
2951
2952 Ok(entries)
2953 }
2954
2955 fn generate_jes_from_document_flows(
2960 &mut self,
2961 flows: &DocumentFlowSnapshot,
2962 ) -> SynthResult<Vec<JournalEntry>> {
2963 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2964 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
2965
2966 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
2967 DocumentFlowJeConfig::default(),
2968 self.seed,
2969 );
2970 let mut entries = Vec::new();
2971
2972 for chain in &flows.p2p_chains {
2974 let chain_entries = generator.generate_from_p2p_chain(chain);
2975 entries.extend(chain_entries);
2976 if let Some(pb) = &pb {
2977 pb.inc(1);
2978 }
2979 }
2980
2981 for chain in &flows.o2c_chains {
2983 let chain_entries = generator.generate_from_o2c_chain(chain);
2984 entries.extend(chain_entries);
2985 if let Some(pb) = &pb {
2986 pb.inc(1);
2987 }
2988 }
2989
2990 if let Some(pb) = pb {
2991 pb.finish_with_message(format!(
2992 "Generated {} JEs from document flows",
2993 entries.len()
2994 ));
2995 }
2996
2997 Ok(entries)
2998 }
2999
3000 fn link_document_flows_to_subledgers(
3005 &mut self,
3006 flows: &DocumentFlowSnapshot,
3007 ) -> SynthResult<SubledgerSnapshot> {
3008 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
3009 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
3010
3011 let mut linker = DocumentFlowLinker::new();
3012
3013 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
3015 if let Some(pb) = &pb {
3016 pb.inc(flows.vendor_invoices.len() as u64);
3017 }
3018
3019 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
3021 if let Some(pb) = &pb {
3022 pb.inc(flows.customer_invoices.len() as u64);
3023 }
3024
3025 if let Some(pb) = pb {
3026 pb.finish_with_message(format!(
3027 "Linked {} AP and {} AR invoices",
3028 ap_invoices.len(),
3029 ar_invoices.len()
3030 ));
3031 }
3032
3033 Ok(SubledgerSnapshot {
3034 ap_invoices,
3035 ar_invoices,
3036 })
3037 }
3038
3039 #[allow(clippy::too_many_arguments)]
3044 fn generate_ocpm_events(
3045 &mut self,
3046 flows: &DocumentFlowSnapshot,
3047 sourcing: &SourcingSnapshot,
3048 hr: &HrSnapshot,
3049 manufacturing: &ManufacturingSnapshot,
3050 banking: &BankingSnapshot,
3051 audit: &AuditSnapshot,
3052 financial_reporting: &FinancialReportingSnapshot,
3053 ) -> SynthResult<OcpmSnapshot> {
3054 let total_chains = flows.p2p_chains.len()
3055 + flows.o2c_chains.len()
3056 + sourcing.sourcing_projects.len()
3057 + hr.payroll_runs.len()
3058 + manufacturing.production_orders.len()
3059 + banking.customers.len()
3060 + audit.engagements.len()
3061 + financial_reporting.bank_reconciliations.len();
3062 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
3063
3064 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
3066 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
3067
3068 let ocpm_config = OcpmGeneratorConfig {
3070 generate_p2p: true,
3071 generate_o2c: true,
3072 generate_s2c: !sourcing.sourcing_projects.is_empty(),
3073 generate_h2r: !hr.payroll_runs.is_empty(),
3074 generate_mfg: !manufacturing.production_orders.is_empty(),
3075 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
3076 generate_bank: !banking.customers.is_empty(),
3077 generate_audit: !audit.engagements.is_empty(),
3078 happy_path_rate: 0.75,
3079 exception_path_rate: 0.20,
3080 error_path_rate: 0.05,
3081 add_duration_variability: true,
3082 duration_std_dev_factor: 0.3,
3083 };
3084 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
3085
3086 let available_users: Vec<String> = self
3088 .master_data
3089 .employees
3090 .iter()
3091 .take(20)
3092 .map(|e| e.user_id.clone())
3093 .collect();
3094
3095 let add_result = |event_log: &mut OcpmEventLog,
3097 result: datasynth_ocpm::CaseGenerationResult| {
3098 for event in result.events {
3099 event_log.add_event(event);
3100 }
3101 for object in result.objects {
3102 event_log.add_object(object);
3103 }
3104 for relationship in result.relationships {
3105 event_log.add_relationship(relationship);
3106 }
3107 event_log.add_case(result.case_trace);
3108 };
3109
3110 for chain in &flows.p2p_chains {
3112 let po = &chain.purchase_order;
3113 let documents = P2pDocuments::new(
3114 &po.header.document_id,
3115 &po.vendor_id,
3116 &po.header.company_code,
3117 po.total_net_amount,
3118 &po.header.currency,
3119 )
3120 .with_goods_receipt(
3121 chain
3122 .goods_receipts
3123 .first()
3124 .map(|gr| gr.header.document_id.as_str())
3125 .unwrap_or(""),
3126 )
3127 .with_invoice(
3128 chain
3129 .vendor_invoice
3130 .as_ref()
3131 .map(|vi| vi.header.document_id.as_str())
3132 .unwrap_or(""),
3133 )
3134 .with_payment(
3135 chain
3136 .payment
3137 .as_ref()
3138 .map(|p| p.header.document_id.as_str())
3139 .unwrap_or(""),
3140 );
3141
3142 let start_time =
3143 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
3144 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
3145 add_result(&mut event_log, result);
3146
3147 if let Some(pb) = &pb {
3148 pb.inc(1);
3149 }
3150 }
3151
3152 for chain in &flows.o2c_chains {
3154 let so = &chain.sales_order;
3155 let documents = O2cDocuments::new(
3156 &so.header.document_id,
3157 &so.customer_id,
3158 &so.header.company_code,
3159 so.total_net_amount,
3160 &so.header.currency,
3161 )
3162 .with_delivery(
3163 chain
3164 .deliveries
3165 .first()
3166 .map(|d| d.header.document_id.as_str())
3167 .unwrap_or(""),
3168 )
3169 .with_invoice(
3170 chain
3171 .customer_invoice
3172 .as_ref()
3173 .map(|ci| ci.header.document_id.as_str())
3174 .unwrap_or(""),
3175 )
3176 .with_receipt(
3177 chain
3178 .customer_receipt
3179 .as_ref()
3180 .map(|r| r.header.document_id.as_str())
3181 .unwrap_or(""),
3182 );
3183
3184 let start_time =
3185 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
3186 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
3187 add_result(&mut event_log, result);
3188
3189 if let Some(pb) = &pb {
3190 pb.inc(1);
3191 }
3192 }
3193
3194 for project in &sourcing.sourcing_projects {
3196 let vendor_id = sourcing
3198 .contracts
3199 .iter()
3200 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
3201 .map(|c| c.vendor_id.clone())
3202 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
3203 .unwrap_or_else(|| "V000".to_string());
3204 let mut docs = S2cDocuments::new(
3205 &project.project_id,
3206 &vendor_id,
3207 &project.company_code,
3208 project.estimated_annual_spend,
3209 );
3210 if let Some(rfx) = sourcing
3212 .rfx_events
3213 .iter()
3214 .find(|r| r.sourcing_project_id == project.project_id)
3215 {
3216 docs = docs.with_rfx(&rfx.rfx_id);
3217 if let Some(bid) = sourcing.bids.iter().find(|b| {
3219 b.rfx_id == rfx.rfx_id
3220 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
3221 }) {
3222 docs = docs.with_winning_bid(&bid.bid_id);
3223 }
3224 }
3225 if let Some(contract) = sourcing
3227 .contracts
3228 .iter()
3229 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
3230 {
3231 docs = docs.with_contract(&contract.contract_id);
3232 }
3233 let start_time = chrono::Utc::now() - chrono::Duration::days(90);
3234 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
3235 add_result(&mut event_log, result);
3236
3237 if let Some(pb) = &pb {
3238 pb.inc(1);
3239 }
3240 }
3241
3242 for run in &hr.payroll_runs {
3244 let employee_id = hr
3246 .payroll_line_items
3247 .iter()
3248 .find(|li| li.payroll_id == run.payroll_id)
3249 .map(|li| li.employee_id.as_str())
3250 .unwrap_or("EMP000");
3251 let docs = H2rDocuments::new(
3252 &run.payroll_id,
3253 employee_id,
3254 &run.company_code,
3255 run.total_gross,
3256 )
3257 .with_time_entries(
3258 hr.time_entries
3259 .iter()
3260 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
3261 .take(5)
3262 .map(|t| t.entry_id.as_str())
3263 .collect(),
3264 );
3265 let start_time = chrono::Utc::now() - chrono::Duration::days(30);
3266 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
3267 add_result(&mut event_log, result);
3268
3269 if let Some(pb) = &pb {
3270 pb.inc(1);
3271 }
3272 }
3273
3274 for order in &manufacturing.production_orders {
3276 let mut docs = MfgDocuments::new(
3277 &order.order_id,
3278 &order.material_id,
3279 &order.company_code,
3280 order.planned_quantity,
3281 )
3282 .with_operations(
3283 order
3284 .operations
3285 .iter()
3286 .map(|o| format!("OP-{:04}", o.operation_number))
3287 .collect::<Vec<_>>()
3288 .iter()
3289 .map(|s| s.as_str())
3290 .collect(),
3291 );
3292 if let Some(insp) = manufacturing
3294 .quality_inspections
3295 .iter()
3296 .find(|i| i.reference_id == order.order_id)
3297 {
3298 docs = docs.with_inspection(&insp.inspection_id);
3299 }
3300 if let Some(cc) = manufacturing.cycle_counts.first() {
3302 docs = docs.with_cycle_count(&cc.count_id);
3303 }
3304 let start_time = chrono::Utc::now() - chrono::Duration::days(60);
3305 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
3306 add_result(&mut event_log, result);
3307
3308 if let Some(pb) = &pb {
3309 pb.inc(1);
3310 }
3311 }
3312
3313 for customer in &banking.customers {
3315 let customer_id_str = customer.customer_id.to_string();
3316 let mut docs = BankDocuments::new(&customer_id_str, "1000");
3317 if let Some(account) = banking
3319 .accounts
3320 .iter()
3321 .find(|a| a.primary_owner_id == customer.customer_id)
3322 {
3323 let account_id_str = account.account_id.to_string();
3324 docs = docs.with_account(&account_id_str);
3325 let txn_strs: Vec<String> = banking
3327 .transactions
3328 .iter()
3329 .filter(|t| t.account_id == account.account_id)
3330 .take(10)
3331 .map(|t| t.transaction_id.to_string())
3332 .collect();
3333 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
3334 let txn_amounts: Vec<rust_decimal::Decimal> = banking
3335 .transactions
3336 .iter()
3337 .filter(|t| t.account_id == account.account_id)
3338 .take(10)
3339 .map(|t| t.amount)
3340 .collect();
3341 if !txn_ids.is_empty() {
3342 docs = docs.with_transactions(txn_ids, txn_amounts);
3343 }
3344 }
3345 let start_time = chrono::Utc::now() - chrono::Duration::days(180);
3346 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
3347 add_result(&mut event_log, result);
3348
3349 if let Some(pb) = &pb {
3350 pb.inc(1);
3351 }
3352 }
3353
3354 for engagement in &audit.engagements {
3356 let engagement_id_str = engagement.engagement_id.to_string();
3357 let docs = AuditDocuments::new(&engagement_id_str, &engagement.client_entity_id)
3358 .with_workpapers(
3359 audit
3360 .workpapers
3361 .iter()
3362 .filter(|w| w.engagement_id == engagement.engagement_id)
3363 .take(10)
3364 .map(|w| w.workpaper_id.to_string())
3365 .collect::<Vec<_>>()
3366 .iter()
3367 .map(|s| s.as_str())
3368 .collect(),
3369 )
3370 .with_evidence(
3371 audit
3372 .evidence
3373 .iter()
3374 .filter(|e| e.engagement_id == engagement.engagement_id)
3375 .take(10)
3376 .map(|e| e.evidence_id.to_string())
3377 .collect::<Vec<_>>()
3378 .iter()
3379 .map(|s| s.as_str())
3380 .collect(),
3381 )
3382 .with_risks(
3383 audit
3384 .risk_assessments
3385 .iter()
3386 .filter(|r| r.engagement_id == engagement.engagement_id)
3387 .take(5)
3388 .map(|r| r.risk_id.to_string())
3389 .collect::<Vec<_>>()
3390 .iter()
3391 .map(|s| s.as_str())
3392 .collect(),
3393 )
3394 .with_findings(
3395 audit
3396 .findings
3397 .iter()
3398 .filter(|f| f.engagement_id == engagement.engagement_id)
3399 .take(5)
3400 .map(|f| f.finding_id.to_string())
3401 .collect::<Vec<_>>()
3402 .iter()
3403 .map(|s| s.as_str())
3404 .collect(),
3405 )
3406 .with_judgments(
3407 audit
3408 .judgments
3409 .iter()
3410 .filter(|j| j.engagement_id == engagement.engagement_id)
3411 .take(5)
3412 .map(|j| j.judgment_id.to_string())
3413 .collect::<Vec<_>>()
3414 .iter()
3415 .map(|s| s.as_str())
3416 .collect(),
3417 );
3418 let start_time = chrono::Utc::now() - chrono::Duration::days(120);
3419 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
3420 add_result(&mut event_log, result);
3421
3422 if let Some(pb) = &pb {
3423 pb.inc(1);
3424 }
3425 }
3426
3427 for recon in &financial_reporting.bank_reconciliations {
3429 let docs = BankReconDocuments::new(
3430 &recon.reconciliation_id,
3431 &recon.bank_account_id,
3432 &recon.company_code,
3433 recon.bank_ending_balance,
3434 )
3435 .with_statement_lines(
3436 recon
3437 .statement_lines
3438 .iter()
3439 .take(20)
3440 .map(|l| l.line_id.as_str())
3441 .collect(),
3442 )
3443 .with_reconciling_items(
3444 recon
3445 .reconciling_items
3446 .iter()
3447 .take(10)
3448 .map(|i| i.item_id.as_str())
3449 .collect(),
3450 );
3451 let start_time = chrono::Utc::now() - chrono::Duration::days(30);
3452 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
3453 add_result(&mut event_log, result);
3454
3455 if let Some(pb) = &pb {
3456 pb.inc(1);
3457 }
3458 }
3459
3460 event_log.compute_variants();
3462
3463 let summary = event_log.summary();
3464
3465 if let Some(pb) = pb {
3466 pb.finish_with_message(format!(
3467 "Generated {} OCPM events, {} objects",
3468 summary.event_count, summary.object_count
3469 ));
3470 }
3471
3472 Ok(OcpmSnapshot {
3473 event_count: summary.event_count,
3474 object_count: summary.object_count,
3475 case_count: summary.case_count,
3476 event_log: Some(event_log),
3477 })
3478 }
3479
3480 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
3482 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
3483
3484 let anomaly_config = AnomalyInjectorConfig {
3485 rates: AnomalyRateConfig {
3486 total_rate: 0.02,
3487 ..Default::default()
3488 },
3489 seed: self.seed + 5000,
3490 ..Default::default()
3491 };
3492
3493 let mut injector = AnomalyInjector::new(anomaly_config);
3494 let result = injector.process_entries(entries);
3495
3496 if let Some(pb) = &pb {
3497 pb.inc(entries.len() as u64);
3498 pb.finish_with_message("Anomaly injection complete");
3499 }
3500
3501 let mut by_type = HashMap::new();
3502 for label in &result.labels {
3503 *by_type
3504 .entry(format!("{:?}", label.anomaly_type))
3505 .or_insert(0) += 1;
3506 }
3507
3508 Ok(AnomalyLabels {
3509 labels: result.labels,
3510 summary: Some(result.summary),
3511 by_type,
3512 })
3513 }
3514
3515 fn validate_journal_entries(
3524 &mut self,
3525 entries: &[JournalEntry],
3526 ) -> SynthResult<BalanceValidationResult> {
3527 let clean_entries: Vec<&JournalEntry> = entries
3529 .iter()
3530 .filter(|e| {
3531 e.header
3532 .header_text
3533 .as_ref()
3534 .map(|t| !t.contains("[HUMAN_ERROR:"))
3535 .unwrap_or(true)
3536 })
3537 .collect();
3538
3539 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
3540
3541 let config = BalanceTrackerConfig {
3543 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
3547 };
3548
3549 let mut tracker = RunningBalanceTracker::new(config);
3550
3551 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
3553 let errors = tracker.apply_entries(&clean_refs);
3554
3555 if let Some(pb) = &pb {
3556 pb.inc(entries.len() as u64);
3557 }
3558
3559 let has_unbalanced = tracker
3562 .get_validation_errors()
3563 .iter()
3564 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
3565
3566 let mut all_errors = errors;
3569 all_errors.extend(tracker.get_validation_errors().iter().cloned());
3570 let company_codes: Vec<String> = self
3571 .config
3572 .companies
3573 .iter()
3574 .map(|c| c.code.clone())
3575 .collect();
3576
3577 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3578 .map(|d| d + chrono::Months::new(self.config.global.period_months))
3579 .unwrap_or_else(|_| chrono::Local::now().date_naive());
3580
3581 for company_code in &company_codes {
3582 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
3583 all_errors.push(e);
3584 }
3585 }
3586
3587 let stats = tracker.get_statistics();
3589
3590 let is_balanced = all_errors.is_empty();
3592
3593 if let Some(pb) = pb {
3594 let msg = if is_balanced {
3595 "Balance validation passed"
3596 } else {
3597 "Balance validation completed with errors"
3598 };
3599 pb.finish_with_message(msg);
3600 }
3601
3602 Ok(BalanceValidationResult {
3603 validated: true,
3604 is_balanced,
3605 entries_processed: stats.entries_processed,
3606 total_debits: stats.total_debits,
3607 total_credits: stats.total_credits,
3608 accounts_tracked: stats.accounts_tracked,
3609 companies_tracked: stats.companies_tracked,
3610 validation_errors: all_errors,
3611 has_unbalanced_entries: has_unbalanced,
3612 })
3613 }
3614
3615 fn inject_data_quality(
3620 &mut self,
3621 entries: &mut [JournalEntry],
3622 ) -> SynthResult<DataQualityStats> {
3623 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
3624
3625 let config = DataQualityConfig::minimal();
3627 let mut injector = DataQualityInjector::new(config);
3628
3629 let context = HashMap::new();
3631
3632 for entry in entries.iter_mut() {
3633 if let Some(text) = &entry.header.header_text {
3635 let processed = injector.process_text_field(
3636 "header_text",
3637 text,
3638 &entry.header.document_id.to_string(),
3639 &context,
3640 );
3641 match processed {
3642 Some(new_text) if new_text != *text => {
3643 entry.header.header_text = Some(new_text);
3644 }
3645 None => {
3646 entry.header.header_text = None; }
3648 _ => {}
3649 }
3650 }
3651
3652 if let Some(ref_text) = &entry.header.reference {
3654 let processed = injector.process_text_field(
3655 "reference",
3656 ref_text,
3657 &entry.header.document_id.to_string(),
3658 &context,
3659 );
3660 match processed {
3661 Some(new_text) if new_text != *ref_text => {
3662 entry.header.reference = Some(new_text);
3663 }
3664 None => {
3665 entry.header.reference = None;
3666 }
3667 _ => {}
3668 }
3669 }
3670
3671 let user_persona = entry.header.user_persona.clone();
3673 if let Some(processed) = injector.process_text_field(
3674 "user_persona",
3675 &user_persona,
3676 &entry.header.document_id.to_string(),
3677 &context,
3678 ) {
3679 if processed != user_persona {
3680 entry.header.user_persona = processed;
3681 }
3682 }
3683
3684 for line in &mut entry.lines {
3686 if let Some(ref text) = line.line_text {
3688 let processed = injector.process_text_field(
3689 "line_text",
3690 text,
3691 &entry.header.document_id.to_string(),
3692 &context,
3693 );
3694 match processed {
3695 Some(new_text) if new_text != *text => {
3696 line.line_text = Some(new_text);
3697 }
3698 None => {
3699 line.line_text = None;
3700 }
3701 _ => {}
3702 }
3703 }
3704
3705 if let Some(cc) = &line.cost_center {
3707 let processed = injector.process_text_field(
3708 "cost_center",
3709 cc,
3710 &entry.header.document_id.to_string(),
3711 &context,
3712 );
3713 match processed {
3714 Some(new_cc) if new_cc != *cc => {
3715 line.cost_center = Some(new_cc);
3716 }
3717 None => {
3718 line.cost_center = None;
3719 }
3720 _ => {}
3721 }
3722 }
3723 }
3724
3725 if let Some(pb) = &pb {
3726 pb.inc(1);
3727 }
3728 }
3729
3730 if let Some(pb) = pb {
3731 pb.finish_with_message("Data quality injection complete");
3732 }
3733
3734 Ok(injector.stats().clone())
3735 }
3736
3737 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
3748 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3749 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3750 let fiscal_year = start_date.year() as u16;
3751 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
3752
3753 let total_revenue: rust_decimal::Decimal = entries
3755 .iter()
3756 .flat_map(|e| e.lines.iter())
3757 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
3758 .map(|l| l.credit_amount)
3759 .sum();
3760
3761 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
3763
3764 let mut snapshot = AuditSnapshot::default();
3765
3766 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
3768 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
3769 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
3770 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
3771 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
3772 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
3773
3774 let accounts: Vec<String> = self
3776 .coa
3777 .as_ref()
3778 .map(|coa| {
3779 coa.get_postable_accounts()
3780 .iter()
3781 .map(|acc| acc.account_code().to_string())
3782 .collect()
3783 })
3784 .unwrap_or_default();
3785
3786 for (i, company) in self.config.companies.iter().enumerate() {
3788 let company_revenue = total_revenue
3790 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
3791
3792 let engagements_for_company =
3794 self.phase_config.audit_engagements / self.config.companies.len().max(1);
3795 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
3796 1
3797 } else {
3798 0
3799 };
3800
3801 for _eng_idx in 0..(engagements_for_company + extra) {
3802 let engagement = engagement_gen.generate_engagement(
3804 &company.code,
3805 &company.name,
3806 fiscal_year,
3807 period_end,
3808 company_revenue,
3809 None, );
3811
3812 if let Some(pb) = &pb {
3813 pb.inc(1);
3814 }
3815
3816 let team_members: Vec<String> = engagement.team_member_ids.clone();
3818
3819 let workpapers =
3821 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
3822
3823 for wp in &workpapers {
3824 if let Some(pb) = &pb {
3825 pb.inc(1);
3826 }
3827
3828 let evidence = evidence_gen.generate_evidence_for_workpaper(
3830 wp,
3831 &team_members,
3832 wp.preparer_date,
3833 );
3834
3835 for _ in &evidence {
3836 if let Some(pb) = &pb {
3837 pb.inc(1);
3838 }
3839 }
3840
3841 snapshot.evidence.extend(evidence);
3842 }
3843
3844 let risks =
3846 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
3847
3848 for _ in &risks {
3849 if let Some(pb) = &pb {
3850 pb.inc(1);
3851 }
3852 }
3853 snapshot.risk_assessments.extend(risks);
3854
3855 let findings = finding_gen.generate_findings_for_engagement(
3857 &engagement,
3858 &workpapers,
3859 &team_members,
3860 );
3861
3862 for _ in &findings {
3863 if let Some(pb) = &pb {
3864 pb.inc(1);
3865 }
3866 }
3867 snapshot.findings.extend(findings);
3868
3869 let judgments =
3871 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
3872
3873 for _ in &judgments {
3874 if let Some(pb) = &pb {
3875 pb.inc(1);
3876 }
3877 }
3878 snapshot.judgments.extend(judgments);
3879
3880 snapshot.workpapers.extend(workpapers);
3882 snapshot.engagements.push(engagement);
3883 }
3884 }
3885
3886 if let Some(pb) = pb {
3887 pb.finish_with_message(format!(
3888 "Audit data: {} engagements, {} workpapers, {} evidence",
3889 snapshot.engagements.len(),
3890 snapshot.workpapers.len(),
3891 snapshot.evidence.len()
3892 ));
3893 }
3894
3895 Ok(snapshot)
3896 }
3897
3898 fn export_graphs(
3905 &mut self,
3906 entries: &[JournalEntry],
3907 _coa: &Arc<ChartOfAccounts>,
3908 stats: &mut EnhancedGenerationStatistics,
3909 ) -> SynthResult<GraphExportSnapshot> {
3910 let pb = self.create_progress_bar(100, "Exporting Graphs");
3911
3912 let mut snapshot = GraphExportSnapshot::default();
3913
3914 let output_dir = self
3916 .output_path
3917 .clone()
3918 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3919 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
3920
3921 for graph_type in &self.config.graph_export.graph_types {
3923 if let Some(pb) = &pb {
3924 pb.inc(10);
3925 }
3926
3927 let graph_config = TransactionGraphConfig {
3929 include_vendors: false,
3930 include_customers: false,
3931 create_debit_credit_edges: true,
3932 include_document_nodes: graph_type.include_document_nodes,
3933 min_edge_weight: graph_type.min_edge_weight,
3934 aggregate_parallel_edges: graph_type.aggregate_edges,
3935 };
3936
3937 let mut builder = TransactionGraphBuilder::new(graph_config);
3938 builder.add_journal_entries(entries);
3939 let graph = builder.build();
3940
3941 stats.graph_node_count += graph.node_count();
3943 stats.graph_edge_count += graph.edge_count();
3944
3945 if let Some(pb) = &pb {
3946 pb.inc(40);
3947 }
3948
3949 for format in &self.config.graph_export.formats {
3951 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
3952
3953 if let Err(e) = std::fs::create_dir_all(&format_dir) {
3955 warn!("Failed to create graph output directory: {}", e);
3956 continue;
3957 }
3958
3959 match format {
3960 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
3961 let pyg_config = PyGExportConfig {
3962 common: datasynth_graph::CommonExportConfig {
3963 export_node_features: true,
3964 export_edge_features: true,
3965 export_node_labels: true,
3966 export_edge_labels: true,
3967 export_masks: true,
3968 train_ratio: self.config.graph_export.train_ratio,
3969 val_ratio: self.config.graph_export.validation_ratio,
3970 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
3971 },
3972 one_hot_categoricals: false,
3973 };
3974
3975 let exporter = PyGExporter::new(pyg_config);
3976 match exporter.export(&graph, &format_dir) {
3977 Ok(metadata) => {
3978 snapshot.exports.insert(
3979 format!("{}_{}", graph_type.name, "pytorch_geometric"),
3980 GraphExportInfo {
3981 name: graph_type.name.clone(),
3982 format: "pytorch_geometric".to_string(),
3983 output_path: format_dir.clone(),
3984 node_count: metadata.num_nodes,
3985 edge_count: metadata.num_edges,
3986 },
3987 );
3988 snapshot.graph_count += 1;
3989 }
3990 Err(e) => {
3991 warn!("Failed to export PyTorch Geometric graph: {}", e);
3992 }
3993 }
3994 }
3995 datasynth_config::schema::GraphExportFormat::Neo4j => {
3996 debug!("Neo4j export not yet implemented for accounting networks");
3998 }
3999 datasynth_config::schema::GraphExportFormat::Dgl => {
4000 debug!("DGL export not yet implemented for accounting networks");
4002 }
4003 datasynth_config::schema::GraphExportFormat::RustGraph => {
4004 use datasynth_graph::{
4005 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
4006 };
4007
4008 let rustgraph_config = RustGraphExportConfig {
4009 include_features: true,
4010 include_temporal: true,
4011 include_labels: true,
4012 source_name: "datasynth".to_string(),
4013 batch_id: None,
4014 output_format: RustGraphOutputFormat::JsonLines,
4015 export_node_properties: true,
4016 export_edge_properties: true,
4017 pretty_print: false,
4018 };
4019
4020 let exporter = RustGraphExporter::new(rustgraph_config);
4021 match exporter.export(&graph, &format_dir) {
4022 Ok(metadata) => {
4023 snapshot.exports.insert(
4024 format!("{}_{}", graph_type.name, "rustgraph"),
4025 GraphExportInfo {
4026 name: graph_type.name.clone(),
4027 format: "rustgraph".to_string(),
4028 output_path: format_dir.clone(),
4029 node_count: metadata.num_nodes,
4030 edge_count: metadata.num_edges,
4031 },
4032 );
4033 snapshot.graph_count += 1;
4034 }
4035 Err(e) => {
4036 warn!("Failed to export RustGraph: {}", e);
4037 }
4038 }
4039 }
4040 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
4041 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
4043 }
4044 }
4045 }
4046
4047 if let Some(pb) = &pb {
4048 pb.inc(40);
4049 }
4050 }
4051
4052 stats.graph_export_count = snapshot.graph_count;
4053 snapshot.exported = snapshot.graph_count > 0;
4054
4055 if let Some(pb) = pb {
4056 pb.finish_with_message(format!(
4057 "Graphs exported: {} graphs ({} nodes, {} edges)",
4058 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4059 ));
4060 }
4061
4062 Ok(snapshot)
4063 }
4064
4065 #[allow(clippy::too_many_arguments)]
4072 fn export_hypergraph(
4073 &self,
4074 coa: &Arc<ChartOfAccounts>,
4075 entries: &[JournalEntry],
4076 document_flows: &DocumentFlowSnapshot,
4077 sourcing: &SourcingSnapshot,
4078 hr: &HrSnapshot,
4079 manufacturing: &ManufacturingSnapshot,
4080 banking: &BankingSnapshot,
4081 audit: &AuditSnapshot,
4082 financial_reporting: &FinancialReportingSnapshot,
4083 ocpm: &OcpmSnapshot,
4084 stats: &mut EnhancedGenerationStatistics,
4085 ) -> SynthResult<HypergraphExportInfo> {
4086 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
4087 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
4088 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
4089 use datasynth_graph::models::hypergraph::AggregationStrategy;
4090
4091 let hg_settings = &self.config.graph_export.hypergraph;
4092
4093 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
4095 "truncate" => AggregationStrategy::Truncate,
4096 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
4097 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
4098 "importance_sample" => AggregationStrategy::ImportanceSample,
4099 _ => AggregationStrategy::PoolByCounterparty,
4100 };
4101
4102 let builder_config = HypergraphConfig {
4103 max_nodes: hg_settings.max_nodes,
4104 aggregation_strategy,
4105 include_coso: hg_settings.governance_layer.include_coso,
4106 include_controls: hg_settings.governance_layer.include_controls,
4107 include_sox: hg_settings.governance_layer.include_sox,
4108 include_vendors: hg_settings.governance_layer.include_vendors,
4109 include_customers: hg_settings.governance_layer.include_customers,
4110 include_employees: hg_settings.governance_layer.include_employees,
4111 include_p2p: hg_settings.process_layer.include_p2p,
4112 include_o2c: hg_settings.process_layer.include_o2c,
4113 include_s2c: hg_settings.process_layer.include_s2c,
4114 include_h2r: hg_settings.process_layer.include_h2r,
4115 include_mfg: hg_settings.process_layer.include_mfg,
4116 include_bank: hg_settings.process_layer.include_bank,
4117 include_audit: hg_settings.process_layer.include_audit,
4118 include_r2r: hg_settings.process_layer.include_r2r,
4119 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
4120 docs_per_counterparty_threshold: hg_settings
4121 .process_layer
4122 .docs_per_counterparty_threshold,
4123 include_accounts: hg_settings.accounting_layer.include_accounts,
4124 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
4125 include_cross_layer_edges: hg_settings.cross_layer.enabled,
4126 };
4127
4128 let mut builder = HypergraphBuilder::new(builder_config);
4129
4130 builder.add_coso_framework();
4132
4133 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
4136 let controls = InternalControl::standard_controls();
4137 builder.add_controls(&controls);
4138 }
4139
4140 builder.add_vendors(&self.master_data.vendors);
4142 builder.add_customers(&self.master_data.customers);
4143 builder.add_employees(&self.master_data.employees);
4144
4145 builder.add_p2p_documents(
4147 &document_flows.purchase_orders,
4148 &document_flows.goods_receipts,
4149 &document_flows.vendor_invoices,
4150 &document_flows.payments,
4151 );
4152 builder.add_o2c_documents(
4153 &document_flows.sales_orders,
4154 &document_flows.deliveries,
4155 &document_flows.customer_invoices,
4156 );
4157 builder.add_s2c_documents(
4158 &sourcing.sourcing_projects,
4159 &sourcing.qualifications,
4160 &sourcing.rfx_events,
4161 &sourcing.bids,
4162 &sourcing.bid_evaluations,
4163 &sourcing.contracts,
4164 );
4165 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
4166 builder.add_mfg_documents(
4167 &manufacturing.production_orders,
4168 &manufacturing.quality_inspections,
4169 &manufacturing.cycle_counts,
4170 );
4171 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
4172 builder.add_audit_documents(
4173 &audit.engagements,
4174 &audit.workpapers,
4175 &audit.findings,
4176 &audit.evidence,
4177 &audit.risk_assessments,
4178 &audit.judgments,
4179 );
4180 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
4181
4182 if let Some(ref event_log) = ocpm.event_log {
4184 builder.add_ocpm_events(event_log);
4185 }
4186
4187 builder.add_accounts(coa);
4189 builder.add_journal_entries_as_hyperedges(entries);
4190
4191 let hypergraph = builder.build();
4193
4194 let output_dir = self
4196 .output_path
4197 .clone()
4198 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
4199 let hg_dir = output_dir
4200 .join(&self.config.graph_export.output_subdirectory)
4201 .join(&hg_settings.output_subdirectory);
4202
4203 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
4205 "unified" => {
4206 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
4207 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
4208 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
4209 })?;
4210 (
4211 metadata.num_nodes,
4212 metadata.num_edges,
4213 metadata.num_hyperedges,
4214 )
4215 }
4216 _ => {
4217 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
4219 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
4220 SynthError::generation(format!("Hypergraph export failed: {}", e))
4221 })?;
4222 (
4223 metadata.num_nodes,
4224 metadata.num_edges,
4225 metadata.num_hyperedges,
4226 )
4227 }
4228 };
4229
4230 #[cfg(feature = "streaming")]
4232 if let Some(ref target_url) = hg_settings.stream_target {
4233 use crate::stream_client::{StreamClient, StreamConfig};
4234 use std::io::Write as _;
4235
4236 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
4237 let stream_config = StreamConfig {
4238 target_url: target_url.clone(),
4239 batch_size: hg_settings.stream_batch_size,
4240 api_key,
4241 ..StreamConfig::default()
4242 };
4243
4244 match StreamClient::new(stream_config) {
4245 Ok(mut client) => {
4246 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
4247 match exporter.export_to_writer(&hypergraph, &mut client) {
4248 Ok(_) => {
4249 if let Err(e) = client.flush() {
4250 warn!("Failed to flush stream client: {}", e);
4251 } else {
4252 info!("Streamed {} records to {}", client.total_sent(), target_url);
4253 }
4254 }
4255 Err(e) => {
4256 warn!("Streaming export failed: {}", e);
4257 }
4258 }
4259 }
4260 Err(e) => {
4261 warn!("Failed to create stream client: {}", e);
4262 }
4263 }
4264 }
4265
4266 stats.graph_node_count += num_nodes;
4268 stats.graph_edge_count += num_edges;
4269 stats.graph_export_count += 1;
4270
4271 Ok(HypergraphExportInfo {
4272 node_count: num_nodes,
4273 edge_count: num_edges,
4274 hyperedge_count: num_hyperedges,
4275 output_path: hg_dir,
4276 })
4277 }
4278
4279 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
4284 let pb = self.create_progress_bar(100, "Generating Banking Data");
4285
4286 let orchestrator = BankingOrchestratorBuilder::new()
4288 .config(self.config.banking.clone())
4289 .seed(self.seed + 9000)
4290 .build();
4291
4292 if let Some(pb) = &pb {
4293 pb.inc(10);
4294 }
4295
4296 let result = orchestrator.generate();
4298
4299 if let Some(pb) = &pb {
4300 pb.inc(90);
4301 pb.finish_with_message(format!(
4302 "Banking: {} customers, {} transactions",
4303 result.customers.len(),
4304 result.transactions.len()
4305 ));
4306 }
4307
4308 Ok(BankingSnapshot {
4309 customers: result.customers,
4310 accounts: result.accounts,
4311 transactions: result.transactions,
4312 suspicious_count: result.stats.suspicious_count,
4313 scenario_count: result.scenarios.len(),
4314 })
4315 }
4316
4317 fn calculate_total_transactions(&self) -> u64 {
4319 let months = self.config.global.period_months as f64;
4320 self.config
4321 .companies
4322 .iter()
4323 .map(|c| {
4324 let annual = c.annual_transaction_volume.count() as f64;
4325 let weighted = annual * c.volume_weight;
4326 (weighted * months / 12.0) as u64
4327 })
4328 .sum()
4329 }
4330
4331 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
4333 if !self.phase_config.show_progress {
4334 return None;
4335 }
4336
4337 let pb = if let Some(mp) = &self.multi_progress {
4338 mp.add(ProgressBar::new(total))
4339 } else {
4340 ProgressBar::new(total)
4341 };
4342
4343 pb.set_style(
4344 ProgressStyle::default_bar()
4345 .template(&format!(
4346 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
4347 message
4348 ))
4349 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
4350 .progress_chars("#>-"),
4351 );
4352
4353 Some(pb)
4354 }
4355
4356 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
4358 self.coa.clone()
4359 }
4360
4361 pub fn get_master_data(&self) -> &MasterDataSnapshot {
4363 &self.master_data
4364 }
4365
4366 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
4368 use super::lineage::LineageGraphBuilder;
4369
4370 let mut builder = LineageGraphBuilder::new();
4371
4372 builder.add_config_section("config:global", "Global Config");
4374 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
4375 builder.add_config_section("config:transactions", "Transaction Config");
4376
4377 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
4379 builder.add_generator_phase("phase:je", "Journal Entry Generation");
4380
4381 builder.configured_by("phase:coa", "config:chart_of_accounts");
4383 builder.configured_by("phase:je", "config:transactions");
4384
4385 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
4387 builder.produced_by("output:je", "phase:je");
4388
4389 if self.phase_config.generate_master_data {
4391 builder.add_config_section("config:master_data", "Master Data Config");
4392 builder.add_generator_phase("phase:master_data", "Master Data Generation");
4393 builder.configured_by("phase:master_data", "config:master_data");
4394 builder.input_to("phase:master_data", "phase:je");
4395 }
4396
4397 if self.phase_config.generate_document_flows {
4398 builder.add_config_section("config:document_flows", "Document Flow Config");
4399 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
4400 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
4401 builder.configured_by("phase:p2p", "config:document_flows");
4402 builder.configured_by("phase:o2c", "config:document_flows");
4403
4404 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
4405 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
4406 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
4407 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
4408 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
4409
4410 builder.produced_by("output:po", "phase:p2p");
4411 builder.produced_by("output:gr", "phase:p2p");
4412 builder.produced_by("output:vi", "phase:p2p");
4413 builder.produced_by("output:so", "phase:o2c");
4414 builder.produced_by("output:ci", "phase:o2c");
4415 }
4416
4417 if self.phase_config.inject_anomalies {
4418 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
4419 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
4420 builder.configured_by("phase:anomaly", "config:fraud");
4421 builder.add_output_file(
4422 "output:labels",
4423 "Anomaly Labels",
4424 "labels/anomaly_labels.csv",
4425 );
4426 builder.produced_by("output:labels", "phase:anomaly");
4427 }
4428
4429 if self.phase_config.generate_audit {
4430 builder.add_config_section("config:audit", "Audit Config");
4431 builder.add_generator_phase("phase:audit", "Audit Data Generation");
4432 builder.configured_by("phase:audit", "config:audit");
4433 }
4434
4435 if self.phase_config.generate_banking {
4436 builder.add_config_section("config:banking", "Banking Config");
4437 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
4438 builder.configured_by("phase:banking", "config:banking");
4439 }
4440
4441 if self.config.llm.enabled {
4442 builder.add_config_section("config:llm", "LLM Enrichment Config");
4443 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
4444 builder.configured_by("phase:llm_enrichment", "config:llm");
4445 }
4446
4447 if self.config.diffusion.enabled {
4448 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
4449 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
4450 builder.configured_by("phase:diffusion", "config:diffusion");
4451 }
4452
4453 if self.config.causal.enabled {
4454 builder.add_config_section("config:causal", "Causal Generation Config");
4455 builder.add_generator_phase("phase:causal", "Causal Overlay");
4456 builder.configured_by("phase:causal", "config:causal");
4457 }
4458
4459 builder.build()
4460 }
4461}
4462
4463fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
4465 match format {
4466 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
4467 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
4468 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
4469 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
4470 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
4471 }
4472}
4473
4474#[cfg(test)]
4475#[allow(clippy::unwrap_used)]
4476mod tests {
4477 use super::*;
4478 use datasynth_config::schema::*;
4479
4480 fn create_test_config() -> GeneratorConfig {
4481 GeneratorConfig {
4482 global: GlobalConfig {
4483 industry: IndustrySector::Manufacturing,
4484 start_date: "2024-01-01".to_string(),
4485 period_months: 1,
4486 seed: Some(42),
4487 parallel: false,
4488 group_currency: "USD".to_string(),
4489 worker_threads: 0,
4490 memory_limit_mb: 0,
4491 },
4492 companies: vec![CompanyConfig {
4493 code: "1000".to_string(),
4494 name: "Test Company".to_string(),
4495 currency: "USD".to_string(),
4496 country: "US".to_string(),
4497 annual_transaction_volume: TransactionVolume::TenK,
4498 volume_weight: 1.0,
4499 fiscal_year_variant: "K4".to_string(),
4500 }],
4501 chart_of_accounts: ChartOfAccountsConfig {
4502 complexity: CoAComplexity::Small,
4503 industry_specific: true,
4504 custom_accounts: None,
4505 min_hierarchy_depth: 2,
4506 max_hierarchy_depth: 4,
4507 },
4508 transactions: TransactionConfig::default(),
4509 output: OutputConfig::default(),
4510 fraud: FraudConfig::default(),
4511 internal_controls: InternalControlsConfig::default(),
4512 business_processes: BusinessProcessConfig::default(),
4513 user_personas: UserPersonaConfig::default(),
4514 templates: TemplateConfig::default(),
4515 approval: ApprovalConfig::default(),
4516 departments: DepartmentConfig::default(),
4517 master_data: MasterDataConfig::default(),
4518 document_flows: DocumentFlowConfig::default(),
4519 intercompany: IntercompanyConfig::default(),
4520 balance: BalanceConfig::default(),
4521 ocpm: OcpmConfig::default(),
4522 audit: AuditGenerationConfig::default(),
4523 banking: datasynth_banking::BankingConfig::default(),
4524 data_quality: DataQualitySchemaConfig::default(),
4525 scenario: ScenarioConfig::default(),
4526 temporal: TemporalDriftConfig::default(),
4527 graph_export: GraphExportConfig::default(),
4528 streaming: StreamingSchemaConfig::default(),
4529 rate_limit: RateLimitSchemaConfig::default(),
4530 temporal_attributes: TemporalAttributeSchemaConfig::default(),
4531 relationships: RelationshipSchemaConfig::default(),
4532 accounting_standards: AccountingStandardsConfig::default(),
4533 audit_standards: AuditStandardsConfig::default(),
4534 distributions: Default::default(),
4535 temporal_patterns: Default::default(),
4536 vendor_network: VendorNetworkSchemaConfig::default(),
4537 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
4538 relationship_strength: RelationshipStrengthSchemaConfig::default(),
4539 cross_process_links: CrossProcessLinksSchemaConfig::default(),
4540 organizational_events: OrganizationalEventsSchemaConfig::default(),
4541 behavioral_drift: BehavioralDriftSchemaConfig::default(),
4542 market_drift: MarketDriftSchemaConfig::default(),
4543 drift_labeling: DriftLabelingSchemaConfig::default(),
4544 anomaly_injection: Default::default(),
4545 industry_specific: Default::default(),
4546 fingerprint_privacy: Default::default(),
4547 quality_gates: Default::default(),
4548 compliance: Default::default(),
4549 webhooks: Default::default(),
4550 llm: Default::default(),
4551 diffusion: Default::default(),
4552 causal: Default::default(),
4553 source_to_pay: Default::default(),
4554 financial_reporting: Default::default(),
4555 hr: Default::default(),
4556 manufacturing: Default::default(),
4557 sales_quotes: Default::default(),
4558 }
4559 }
4560
4561 #[test]
4562 fn test_enhanced_orchestrator_creation() {
4563 let config = create_test_config();
4564 let orchestrator = EnhancedOrchestrator::with_defaults(config);
4565 assert!(orchestrator.is_ok());
4566 }
4567
4568 #[test]
4569 fn test_minimal_generation() {
4570 let config = create_test_config();
4571 let phase_config = PhaseConfig {
4572 generate_master_data: false,
4573 generate_document_flows: false,
4574 generate_journal_entries: true,
4575 inject_anomalies: false,
4576 show_progress: false,
4577 ..Default::default()
4578 };
4579
4580 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4581 let result = orchestrator.generate();
4582
4583 assert!(result.is_ok());
4584 let result = result.unwrap();
4585 assert!(!result.journal_entries.is_empty());
4586 }
4587
4588 #[test]
4589 fn test_master_data_generation() {
4590 let config = create_test_config();
4591 let phase_config = PhaseConfig {
4592 generate_master_data: true,
4593 generate_document_flows: false,
4594 generate_journal_entries: false,
4595 inject_anomalies: false,
4596 show_progress: false,
4597 vendors_per_company: 5,
4598 customers_per_company: 5,
4599 materials_per_company: 10,
4600 assets_per_company: 5,
4601 employees_per_company: 10,
4602 ..Default::default()
4603 };
4604
4605 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4606 let result = orchestrator.generate().unwrap();
4607
4608 assert!(!result.master_data.vendors.is_empty());
4609 assert!(!result.master_data.customers.is_empty());
4610 assert!(!result.master_data.materials.is_empty());
4611 }
4612
4613 #[test]
4614 fn test_document_flow_generation() {
4615 let config = create_test_config();
4616 let phase_config = PhaseConfig {
4617 generate_master_data: true,
4618 generate_document_flows: true,
4619 generate_journal_entries: false,
4620 inject_anomalies: false,
4621 inject_data_quality: false,
4622 validate_balances: false,
4623 generate_ocpm_events: false,
4624 show_progress: false,
4625 vendors_per_company: 5,
4626 customers_per_company: 5,
4627 materials_per_company: 10,
4628 assets_per_company: 5,
4629 employees_per_company: 10,
4630 p2p_chains: 5,
4631 o2c_chains: 5,
4632 ..Default::default()
4633 };
4634
4635 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4636 let result = orchestrator.generate().unwrap();
4637
4638 assert!(!result.document_flows.p2p_chains.is_empty());
4640 assert!(!result.document_flows.o2c_chains.is_empty());
4641
4642 assert!(!result.document_flows.purchase_orders.is_empty());
4644 assert!(!result.document_flows.sales_orders.is_empty());
4645 }
4646
4647 #[test]
4648 fn test_anomaly_injection() {
4649 let config = create_test_config();
4650 let phase_config = PhaseConfig {
4651 generate_master_data: false,
4652 generate_document_flows: false,
4653 generate_journal_entries: true,
4654 inject_anomalies: true,
4655 show_progress: false,
4656 ..Default::default()
4657 };
4658
4659 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4660 let result = orchestrator.generate().unwrap();
4661
4662 assert!(!result.journal_entries.is_empty());
4664
4665 assert!(result.anomaly_labels.summary.is_some());
4668 }
4669
4670 #[test]
4671 fn test_full_generation_pipeline() {
4672 let config = create_test_config();
4673 let phase_config = PhaseConfig {
4674 generate_master_data: true,
4675 generate_document_flows: true,
4676 generate_journal_entries: true,
4677 inject_anomalies: false,
4678 inject_data_quality: false,
4679 validate_balances: true,
4680 generate_ocpm_events: false,
4681 show_progress: false,
4682 vendors_per_company: 3,
4683 customers_per_company: 3,
4684 materials_per_company: 5,
4685 assets_per_company: 3,
4686 employees_per_company: 5,
4687 p2p_chains: 3,
4688 o2c_chains: 3,
4689 ..Default::default()
4690 };
4691
4692 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4693 let result = orchestrator.generate().unwrap();
4694
4695 assert!(!result.master_data.vendors.is_empty());
4697 assert!(!result.master_data.customers.is_empty());
4698 assert!(!result.document_flows.p2p_chains.is_empty());
4699 assert!(!result.document_flows.o2c_chains.is_empty());
4700 assert!(!result.journal_entries.is_empty());
4701 assert!(result.statistics.accounts_count > 0);
4702
4703 assert!(!result.subledger.ap_invoices.is_empty());
4705 assert!(!result.subledger.ar_invoices.is_empty());
4706
4707 assert!(result.balance_validation.validated);
4709 assert!(result.balance_validation.entries_processed > 0);
4710 }
4711
4712 #[test]
4713 fn test_subledger_linking() {
4714 let config = create_test_config();
4715 let phase_config = PhaseConfig {
4716 generate_master_data: true,
4717 generate_document_flows: true,
4718 generate_journal_entries: false,
4719 inject_anomalies: false,
4720 inject_data_quality: false,
4721 validate_balances: false,
4722 generate_ocpm_events: false,
4723 show_progress: false,
4724 vendors_per_company: 5,
4725 customers_per_company: 5,
4726 materials_per_company: 10,
4727 assets_per_company: 3,
4728 employees_per_company: 5,
4729 p2p_chains: 5,
4730 o2c_chains: 5,
4731 ..Default::default()
4732 };
4733
4734 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4735 let result = orchestrator.generate().unwrap();
4736
4737 assert!(!result.document_flows.vendor_invoices.is_empty());
4739 assert!(!result.document_flows.customer_invoices.is_empty());
4740
4741 assert!(!result.subledger.ap_invoices.is_empty());
4743 assert!(!result.subledger.ar_invoices.is_empty());
4744
4745 assert_eq!(
4747 result.subledger.ap_invoices.len(),
4748 result.document_flows.vendor_invoices.len()
4749 );
4750
4751 assert_eq!(
4753 result.subledger.ar_invoices.len(),
4754 result.document_flows.customer_invoices.len()
4755 );
4756
4757 assert_eq!(
4759 result.statistics.ap_invoice_count,
4760 result.subledger.ap_invoices.len()
4761 );
4762 assert_eq!(
4763 result.statistics.ar_invoice_count,
4764 result.subledger.ar_invoices.len()
4765 );
4766 }
4767
4768 #[test]
4769 fn test_balance_validation() {
4770 let config = create_test_config();
4771 let phase_config = PhaseConfig {
4772 generate_master_data: false,
4773 generate_document_flows: false,
4774 generate_journal_entries: true,
4775 inject_anomalies: false,
4776 validate_balances: true,
4777 show_progress: false,
4778 ..Default::default()
4779 };
4780
4781 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4782 let result = orchestrator.generate().unwrap();
4783
4784 assert!(result.balance_validation.validated);
4786 assert!(result.balance_validation.entries_processed > 0);
4787
4788 assert!(!result.balance_validation.has_unbalanced_entries);
4790
4791 assert_eq!(
4793 result.balance_validation.total_debits,
4794 result.balance_validation.total_credits
4795 );
4796 }
4797
4798 #[test]
4799 fn test_statistics_accuracy() {
4800 let config = create_test_config();
4801 let phase_config = PhaseConfig {
4802 generate_master_data: true,
4803 generate_document_flows: false,
4804 generate_journal_entries: true,
4805 inject_anomalies: false,
4806 show_progress: false,
4807 vendors_per_company: 10,
4808 customers_per_company: 20,
4809 materials_per_company: 15,
4810 assets_per_company: 5,
4811 employees_per_company: 8,
4812 ..Default::default()
4813 };
4814
4815 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4816 let result = orchestrator.generate().unwrap();
4817
4818 assert_eq!(
4820 result.statistics.vendor_count,
4821 result.master_data.vendors.len()
4822 );
4823 assert_eq!(
4824 result.statistics.customer_count,
4825 result.master_data.customers.len()
4826 );
4827 assert_eq!(
4828 result.statistics.material_count,
4829 result.master_data.materials.len()
4830 );
4831 assert_eq!(
4832 result.statistics.total_entries as usize,
4833 result.journal_entries.len()
4834 );
4835 }
4836
4837 #[test]
4838 fn test_phase_config_defaults() {
4839 let config = PhaseConfig::default();
4840 assert!(config.generate_master_data);
4841 assert!(config.generate_document_flows);
4842 assert!(config.generate_journal_entries);
4843 assert!(!config.inject_anomalies);
4844 assert!(config.validate_balances);
4845 assert!(config.show_progress);
4846 assert!(config.vendors_per_company > 0);
4847 assert!(config.customers_per_company > 0);
4848 }
4849
4850 #[test]
4851 fn test_get_coa_before_generation() {
4852 let config = create_test_config();
4853 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
4854
4855 assert!(orchestrator.get_coa().is_none());
4857 }
4858
4859 #[test]
4860 fn test_get_coa_after_generation() {
4861 let config = create_test_config();
4862 let phase_config = PhaseConfig {
4863 generate_master_data: false,
4864 generate_document_flows: false,
4865 generate_journal_entries: true,
4866 inject_anomalies: false,
4867 show_progress: false,
4868 ..Default::default()
4869 };
4870
4871 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4872 let _ = orchestrator.generate().unwrap();
4873
4874 assert!(orchestrator.get_coa().is_some());
4876 }
4877
4878 #[test]
4879 fn test_get_master_data() {
4880 let config = create_test_config();
4881 let phase_config = PhaseConfig {
4882 generate_master_data: true,
4883 generate_document_flows: false,
4884 generate_journal_entries: false,
4885 inject_anomalies: false,
4886 show_progress: false,
4887 vendors_per_company: 5,
4888 customers_per_company: 5,
4889 materials_per_company: 5,
4890 assets_per_company: 5,
4891 employees_per_company: 5,
4892 ..Default::default()
4893 };
4894
4895 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4896 let _ = orchestrator.generate().unwrap();
4897
4898 let master_data = orchestrator.get_master_data();
4899 assert!(!master_data.vendors.is_empty());
4900 }
4901
4902 #[test]
4903 fn test_with_progress_builder() {
4904 let config = create_test_config();
4905 let orchestrator = EnhancedOrchestrator::with_defaults(config)
4906 .unwrap()
4907 .with_progress(false);
4908
4909 assert!(!orchestrator.phase_config.show_progress);
4911 }
4912
4913 #[test]
4914 fn test_multi_company_generation() {
4915 let mut config = create_test_config();
4916 config.companies.push(CompanyConfig {
4917 code: "2000".to_string(),
4918 name: "Subsidiary".to_string(),
4919 currency: "EUR".to_string(),
4920 country: "DE".to_string(),
4921 annual_transaction_volume: TransactionVolume::TenK,
4922 volume_weight: 0.5,
4923 fiscal_year_variant: "K4".to_string(),
4924 });
4925
4926 let phase_config = PhaseConfig {
4927 generate_master_data: true,
4928 generate_document_flows: false,
4929 generate_journal_entries: true,
4930 inject_anomalies: false,
4931 show_progress: false,
4932 vendors_per_company: 5,
4933 customers_per_company: 5,
4934 materials_per_company: 5,
4935 assets_per_company: 5,
4936 employees_per_company: 5,
4937 ..Default::default()
4938 };
4939
4940 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4941 let result = orchestrator.generate().unwrap();
4942
4943 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
4946 assert!(result.statistics.companies_count == 2);
4947 }
4948
4949 #[test]
4950 fn test_empty_master_data_skips_document_flows() {
4951 let config = create_test_config();
4952 let phase_config = PhaseConfig {
4953 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
4956 inject_anomalies: false,
4957 show_progress: false,
4958 ..Default::default()
4959 };
4960
4961 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4962 let result = orchestrator.generate().unwrap();
4963
4964 assert!(result.document_flows.p2p_chains.is_empty());
4966 assert!(result.document_flows.o2c_chains.is_empty());
4967 }
4968
4969 #[test]
4970 fn test_journal_entry_line_item_count() {
4971 let config = create_test_config();
4972 let phase_config = PhaseConfig {
4973 generate_master_data: false,
4974 generate_document_flows: false,
4975 generate_journal_entries: true,
4976 inject_anomalies: false,
4977 show_progress: false,
4978 ..Default::default()
4979 };
4980
4981 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4982 let result = orchestrator.generate().unwrap();
4983
4984 let calculated_line_items: u64 = result
4986 .journal_entries
4987 .iter()
4988 .map(|e| e.line_count() as u64)
4989 .sum();
4990 assert_eq!(result.statistics.total_line_items, calculated_line_items);
4991 }
4992
4993 #[test]
4994 fn test_audit_generation() {
4995 let config = create_test_config();
4996 let phase_config = PhaseConfig {
4997 generate_master_data: false,
4998 generate_document_flows: false,
4999 generate_journal_entries: true,
5000 inject_anomalies: false,
5001 show_progress: false,
5002 generate_audit: true,
5003 audit_engagements: 2,
5004 workpapers_per_engagement: 5,
5005 evidence_per_workpaper: 2,
5006 risks_per_engagement: 3,
5007 findings_per_engagement: 2,
5008 judgments_per_engagement: 2,
5009 ..Default::default()
5010 };
5011
5012 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5013 let result = orchestrator.generate().unwrap();
5014
5015 assert_eq!(result.audit.engagements.len(), 2);
5017 assert!(!result.audit.workpapers.is_empty());
5018 assert!(!result.audit.evidence.is_empty());
5019 assert!(!result.audit.risk_assessments.is_empty());
5020 assert!(!result.audit.findings.is_empty());
5021 assert!(!result.audit.judgments.is_empty());
5022
5023 assert_eq!(
5025 result.statistics.audit_engagement_count,
5026 result.audit.engagements.len()
5027 );
5028 assert_eq!(
5029 result.statistics.audit_workpaper_count,
5030 result.audit.workpapers.len()
5031 );
5032 assert_eq!(
5033 result.statistics.audit_evidence_count,
5034 result.audit.evidence.len()
5035 );
5036 assert_eq!(
5037 result.statistics.audit_risk_count,
5038 result.audit.risk_assessments.len()
5039 );
5040 assert_eq!(
5041 result.statistics.audit_finding_count,
5042 result.audit.findings.len()
5043 );
5044 assert_eq!(
5045 result.statistics.audit_judgment_count,
5046 result.audit.judgments.len()
5047 );
5048 }
5049
5050 #[test]
5051 fn test_new_phases_disabled_by_default() {
5052 let config = create_test_config();
5053 assert!(!config.llm.enabled);
5055 assert!(!config.diffusion.enabled);
5056 assert!(!config.causal.enabled);
5057
5058 let phase_config = PhaseConfig {
5059 generate_master_data: false,
5060 generate_document_flows: false,
5061 generate_journal_entries: true,
5062 inject_anomalies: false,
5063 show_progress: false,
5064 ..Default::default()
5065 };
5066
5067 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5068 let result = orchestrator.generate().unwrap();
5069
5070 assert_eq!(result.statistics.llm_enrichment_ms, 0);
5072 assert_eq!(result.statistics.llm_vendors_enriched, 0);
5073 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
5074 assert_eq!(result.statistics.diffusion_samples_generated, 0);
5075 assert_eq!(result.statistics.causal_generation_ms, 0);
5076 assert_eq!(result.statistics.causal_samples_generated, 0);
5077 assert!(result.statistics.causal_validation_passed.is_none());
5078 }
5079
5080 #[test]
5081 fn test_llm_enrichment_enabled() {
5082 let mut config = create_test_config();
5083 config.llm.enabled = true;
5084 config.llm.max_vendor_enrichments = 3;
5085
5086 let phase_config = PhaseConfig {
5087 generate_master_data: true,
5088 generate_document_flows: false,
5089 generate_journal_entries: false,
5090 inject_anomalies: false,
5091 show_progress: false,
5092 vendors_per_company: 5,
5093 customers_per_company: 3,
5094 materials_per_company: 3,
5095 assets_per_company: 3,
5096 employees_per_company: 3,
5097 ..Default::default()
5098 };
5099
5100 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5101 let result = orchestrator.generate().unwrap();
5102
5103 assert!(result.statistics.llm_vendors_enriched > 0);
5105 assert!(result.statistics.llm_vendors_enriched <= 3);
5106 }
5107
5108 #[test]
5109 fn test_diffusion_enhancement_enabled() {
5110 let mut config = create_test_config();
5111 config.diffusion.enabled = true;
5112 config.diffusion.n_steps = 50;
5113 config.diffusion.sample_size = 20;
5114
5115 let phase_config = PhaseConfig {
5116 generate_master_data: false,
5117 generate_document_flows: false,
5118 generate_journal_entries: true,
5119 inject_anomalies: false,
5120 show_progress: false,
5121 ..Default::default()
5122 };
5123
5124 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5125 let result = orchestrator.generate().unwrap();
5126
5127 assert_eq!(result.statistics.diffusion_samples_generated, 20);
5129 }
5130
5131 #[test]
5132 fn test_causal_overlay_enabled() {
5133 let mut config = create_test_config();
5134 config.causal.enabled = true;
5135 config.causal.template = "fraud_detection".to_string();
5136 config.causal.sample_size = 100;
5137 config.causal.validate = true;
5138
5139 let phase_config = PhaseConfig {
5140 generate_master_data: false,
5141 generate_document_flows: false,
5142 generate_journal_entries: true,
5143 inject_anomalies: false,
5144 show_progress: false,
5145 ..Default::default()
5146 };
5147
5148 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5149 let result = orchestrator.generate().unwrap();
5150
5151 assert_eq!(result.statistics.causal_samples_generated, 100);
5153 assert!(result.statistics.causal_validation_passed.is_some());
5155 }
5156
5157 #[test]
5158 fn test_causal_overlay_revenue_cycle_template() {
5159 let mut config = create_test_config();
5160 config.causal.enabled = true;
5161 config.causal.template = "revenue_cycle".to_string();
5162 config.causal.sample_size = 50;
5163 config.causal.validate = false;
5164
5165 let phase_config = PhaseConfig {
5166 generate_master_data: false,
5167 generate_document_flows: false,
5168 generate_journal_entries: true,
5169 inject_anomalies: false,
5170 show_progress: false,
5171 ..Default::default()
5172 };
5173
5174 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5175 let result = orchestrator.generate().unwrap();
5176
5177 assert_eq!(result.statistics.causal_samples_generated, 50);
5179 assert!(result.statistics.causal_validation_passed.is_none());
5181 }
5182
5183 #[test]
5184 fn test_all_new_phases_enabled_together() {
5185 let mut config = create_test_config();
5186 config.llm.enabled = true;
5187 config.llm.max_vendor_enrichments = 2;
5188 config.diffusion.enabled = true;
5189 config.diffusion.n_steps = 20;
5190 config.diffusion.sample_size = 10;
5191 config.causal.enabled = true;
5192 config.causal.sample_size = 50;
5193 config.causal.validate = true;
5194
5195 let phase_config = PhaseConfig {
5196 generate_master_data: true,
5197 generate_document_flows: false,
5198 generate_journal_entries: true,
5199 inject_anomalies: false,
5200 show_progress: false,
5201 vendors_per_company: 5,
5202 customers_per_company: 3,
5203 materials_per_company: 3,
5204 assets_per_company: 3,
5205 employees_per_company: 3,
5206 ..Default::default()
5207 };
5208
5209 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5210 let result = orchestrator.generate().unwrap();
5211
5212 assert!(result.statistics.llm_vendors_enriched > 0);
5214 assert_eq!(result.statistics.diffusion_samples_generated, 10);
5215 assert_eq!(result.statistics.causal_samples_generated, 50);
5216 assert!(result.statistics.causal_validation_passed.is_some());
5217 }
5218
5219 #[test]
5220 fn test_statistics_serialization_with_new_fields() {
5221 let stats = EnhancedGenerationStatistics {
5222 total_entries: 100,
5223 total_line_items: 500,
5224 llm_enrichment_ms: 42,
5225 llm_vendors_enriched: 10,
5226 diffusion_enhancement_ms: 100,
5227 diffusion_samples_generated: 50,
5228 causal_generation_ms: 200,
5229 causal_samples_generated: 100,
5230 causal_validation_passed: Some(true),
5231 ..Default::default()
5232 };
5233
5234 let json = serde_json::to_string(&stats).unwrap();
5235 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
5236
5237 assert_eq!(deserialized.llm_enrichment_ms, 42);
5238 assert_eq!(deserialized.llm_vendors_enriched, 10);
5239 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
5240 assert_eq!(deserialized.diffusion_samples_generated, 50);
5241 assert_eq!(deserialized.causal_generation_ms, 200);
5242 assert_eq!(deserialized.causal_samples_generated, 100);
5243 assert_eq!(deserialized.causal_validation_passed, Some(true));
5244 }
5245
5246 #[test]
5247 fn test_statistics_backward_compat_deserialization() {
5248 let old_json = r#"{
5250 "total_entries": 100,
5251 "total_line_items": 500,
5252 "accounts_count": 50,
5253 "companies_count": 1,
5254 "period_months": 12,
5255 "vendor_count": 10,
5256 "customer_count": 20,
5257 "material_count": 15,
5258 "asset_count": 5,
5259 "employee_count": 8,
5260 "p2p_chain_count": 5,
5261 "o2c_chain_count": 5,
5262 "ap_invoice_count": 5,
5263 "ar_invoice_count": 5,
5264 "ocpm_event_count": 0,
5265 "ocpm_object_count": 0,
5266 "ocpm_case_count": 0,
5267 "audit_engagement_count": 0,
5268 "audit_workpaper_count": 0,
5269 "audit_evidence_count": 0,
5270 "audit_risk_count": 0,
5271 "audit_finding_count": 0,
5272 "audit_judgment_count": 0,
5273 "anomalies_injected": 0,
5274 "data_quality_issues": 0,
5275 "banking_customer_count": 0,
5276 "banking_account_count": 0,
5277 "banking_transaction_count": 0,
5278 "banking_suspicious_count": 0,
5279 "graph_export_count": 0,
5280 "graph_node_count": 0,
5281 "graph_edge_count": 0
5282 }"#;
5283
5284 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
5285
5286 assert_eq!(stats.llm_enrichment_ms, 0);
5288 assert_eq!(stats.llm_vendors_enriched, 0);
5289 assert_eq!(stats.diffusion_enhancement_ms, 0);
5290 assert_eq!(stats.diffusion_samples_generated, 0);
5291 assert_eq!(stats.causal_generation_ms, 0);
5292 assert_eq!(stats.causal_samples_generated, 0);
5293 assert!(stats.causal_validation_passed.is_none());
5294 }
5295}