1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50 io::FingerprintReader,
51 models::Fingerprint,
52 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55 AnomalyInjector,
57 AnomalyInjectorConfig,
58 AssetGenerator,
59 AuditEngagementGenerator,
61 BalanceTrackerConfig,
62 BankReconciliationGenerator,
64 BidEvaluationGenerator,
66 BidGenerator,
67 CatalogGenerator,
68 ChartOfAccountsGenerator,
70 ContractGenerator,
71 ControlGenerator,
73 ControlGeneratorConfig,
74 CustomerGenerator,
75 DataQualityConfig,
76 DataQualityInjector,
78 DataQualityStats,
79 DocumentFlowJeConfig,
81 DocumentFlowJeGenerator,
82 DocumentFlowLinker,
84 EmployeeGenerator,
85 EsgAnomalyLabel,
87 EvidenceGenerator,
88 FinancialStatementGenerator,
90 FindingGenerator,
91 JournalEntryGenerator,
92 JudgmentGenerator,
93 LatePaymentDistribution,
94 MaterialGenerator,
95 O2CDocumentChain,
96 O2CGenerator,
97 O2CGeneratorConfig,
98 O2CPaymentBehavior,
99 P2PDocumentChain,
100 P2PGenerator,
102 P2PGeneratorConfig,
103 P2PPaymentBehavior,
104 PaymentReference,
105 QualificationGenerator,
106 RfxGenerator,
107 RiskAssessmentGenerator,
108 RunningBalanceTracker,
110 ScorecardGenerator,
111 SourcingProjectGenerator,
112 SpendAnalysisGenerator,
113 ValidationError,
114 VendorGenerator,
116 WorkpaperGenerator,
117};
118use datasynth_graph::{
119 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
120 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
121};
122use datasynth_ocpm::{
123 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
124 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
125 OcpmUuidFactory, P2pDocuments, S2cDocuments,
126};
127
128use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
129use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
130use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
131use datasynth_core::llm::MockLlmProvider;
132use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
133use datasynth_core::models::documents::PaymentMethod;
134use datasynth_core::models::IndustrySector;
135use datasynth_generators::coa_generator::CoAFramework;
136use datasynth_generators::llm_enrichment::VendorLlmEnricher;
137use rayon::prelude::*;
138
139fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
145 let payment_behavior = &schema_config.payment_behavior;
146 let late_dist = &payment_behavior.late_payment_days_distribution;
147
148 P2PGeneratorConfig {
149 three_way_match_rate: schema_config.three_way_match_rate,
150 partial_delivery_rate: schema_config.partial_delivery_rate,
151 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
152 price_variance_rate: schema_config.price_variance_rate,
153 max_price_variance_percent: schema_config.max_price_variance_percent,
154 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
155 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
156 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
157 payment_method_distribution: vec![
158 (PaymentMethod::BankTransfer, 0.60),
159 (PaymentMethod::Check, 0.25),
160 (PaymentMethod::Wire, 0.10),
161 (PaymentMethod::CreditCard, 0.05),
162 ],
163 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
164 payment_behavior: P2PPaymentBehavior {
165 late_payment_rate: payment_behavior.late_payment_rate,
166 late_payment_distribution: LatePaymentDistribution {
167 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
168 late_8_to_14: late_dist.late_8_to_14,
169 very_late_15_to_30: late_dist.very_late_15_to_30,
170 severely_late_31_to_60: late_dist.severely_late_31_to_60,
171 extremely_late_over_60: late_dist.extremely_late_over_60,
172 },
173 partial_payment_rate: payment_behavior.partial_payment_rate,
174 payment_correction_rate: payment_behavior.payment_correction_rate,
175 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
176 },
177 }
178}
179
180fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
182 let payment_behavior = &schema_config.payment_behavior;
183
184 O2CGeneratorConfig {
185 credit_check_failure_rate: schema_config.credit_check_failure_rate,
186 partial_shipment_rate: schema_config.partial_shipment_rate,
187 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
188 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
189 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
190 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
191 bad_debt_rate: schema_config.bad_debt_rate,
192 returns_rate: schema_config.return_rate,
193 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
194 payment_method_distribution: vec![
195 (PaymentMethod::BankTransfer, 0.50),
196 (PaymentMethod::Check, 0.30),
197 (PaymentMethod::Wire, 0.15),
198 (PaymentMethod::CreditCard, 0.05),
199 ],
200 payment_behavior: O2CPaymentBehavior {
201 partial_payment_rate: payment_behavior.partial_payments.rate,
202 short_payment_rate: payment_behavior.short_payments.rate,
203 max_short_percent: payment_behavior.short_payments.max_short_percent,
204 on_account_rate: payment_behavior.on_account_payments.rate,
205 payment_correction_rate: payment_behavior.payment_corrections.rate,
206 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
207 },
208 }
209}
210
211#[derive(Debug, Clone)]
213pub struct PhaseConfig {
214 pub generate_master_data: bool,
216 pub generate_document_flows: bool,
218 pub generate_ocpm_events: bool,
220 pub generate_journal_entries: bool,
222 pub inject_anomalies: bool,
224 pub inject_data_quality: bool,
226 pub validate_balances: bool,
228 pub show_progress: bool,
230 pub vendors_per_company: usize,
232 pub customers_per_company: usize,
234 pub materials_per_company: usize,
236 pub assets_per_company: usize,
238 pub employees_per_company: usize,
240 pub p2p_chains: usize,
242 pub o2c_chains: usize,
244 pub generate_audit: bool,
246 pub audit_engagements: usize,
248 pub workpapers_per_engagement: usize,
250 pub evidence_per_workpaper: usize,
252 pub risks_per_engagement: usize,
254 pub findings_per_engagement: usize,
256 pub judgments_per_engagement: usize,
258 pub generate_banking: bool,
260 pub generate_graph_export: bool,
262 pub generate_sourcing: bool,
264 pub generate_bank_reconciliation: bool,
266 pub generate_financial_statements: bool,
268 pub generate_accounting_standards: bool,
270 pub generate_manufacturing: bool,
272 pub generate_sales_kpi_budgets: bool,
274 pub generate_tax: bool,
276 pub generate_esg: bool,
278 pub generate_intercompany: bool,
280}
281
282impl Default for PhaseConfig {
283 fn default() -> Self {
284 Self {
285 generate_master_data: true,
286 generate_document_flows: true,
287 generate_ocpm_events: false, generate_journal_entries: true,
289 inject_anomalies: false,
290 inject_data_quality: false, validate_balances: true,
292 show_progress: true,
293 vendors_per_company: 50,
294 customers_per_company: 100,
295 materials_per_company: 200,
296 assets_per_company: 50,
297 employees_per_company: 100,
298 p2p_chains: 100,
299 o2c_chains: 100,
300 generate_audit: false, audit_engagements: 5,
302 workpapers_per_engagement: 20,
303 evidence_per_workpaper: 5,
304 risks_per_engagement: 15,
305 findings_per_engagement: 8,
306 judgments_per_engagement: 10,
307 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
319 }
320}
321
322#[derive(Debug, Clone, Default)]
324pub struct MasterDataSnapshot {
325 pub vendors: Vec<Vendor>,
327 pub customers: Vec<Customer>,
329 pub materials: Vec<Material>,
331 pub assets: Vec<FixedAsset>,
333 pub employees: Vec<Employee>,
335}
336
337#[derive(Debug, Clone)]
339pub struct HypergraphExportInfo {
340 pub node_count: usize,
342 pub edge_count: usize,
344 pub hyperedge_count: usize,
346 pub output_path: PathBuf,
348}
349
350#[derive(Debug, Clone, Default)]
352pub struct DocumentFlowSnapshot {
353 pub p2p_chains: Vec<P2PDocumentChain>,
355 pub o2c_chains: Vec<O2CDocumentChain>,
357 pub purchase_orders: Vec<documents::PurchaseOrder>,
359 pub goods_receipts: Vec<documents::GoodsReceipt>,
361 pub vendor_invoices: Vec<documents::VendorInvoice>,
363 pub sales_orders: Vec<documents::SalesOrder>,
365 pub deliveries: Vec<documents::Delivery>,
367 pub customer_invoices: Vec<documents::CustomerInvoice>,
369 pub payments: Vec<documents::Payment>,
371}
372
373#[derive(Debug, Clone, Default)]
375pub struct SubledgerSnapshot {
376 pub ap_invoices: Vec<APInvoice>,
378 pub ar_invoices: Vec<ARInvoice>,
380 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
382 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
384 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
386}
387
388#[derive(Debug, Clone, Default)]
390pub struct OcpmSnapshot {
391 pub event_log: Option<OcpmEventLog>,
393 pub event_count: usize,
395 pub object_count: usize,
397 pub case_count: usize,
399}
400
401#[derive(Debug, Clone, Default)]
403pub struct AuditSnapshot {
404 pub engagements: Vec<AuditEngagement>,
406 pub workpapers: Vec<Workpaper>,
408 pub evidence: Vec<AuditEvidence>,
410 pub risk_assessments: Vec<RiskAssessment>,
412 pub findings: Vec<AuditFinding>,
414 pub judgments: Vec<ProfessionalJudgment>,
416}
417
418#[derive(Debug, Clone, Default)]
420pub struct BankingSnapshot {
421 pub customers: Vec<BankingCustomer>,
423 pub accounts: Vec<BankAccount>,
425 pub transactions: Vec<BankTransaction>,
427 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
429 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
431 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
433 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
435 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
437 pub suspicious_count: usize,
439 pub scenario_count: usize,
441}
442
443#[derive(Debug, Clone, Default, Serialize)]
445pub struct GraphExportSnapshot {
446 pub exported: bool,
448 pub graph_count: usize,
450 pub exports: HashMap<String, GraphExportInfo>,
452}
453
454#[derive(Debug, Clone, Serialize)]
456pub struct GraphExportInfo {
457 pub name: String,
459 pub format: String,
461 pub output_path: PathBuf,
463 pub node_count: usize,
465 pub edge_count: usize,
467}
468
469#[derive(Debug, Clone, Default)]
471pub struct SourcingSnapshot {
472 pub spend_analyses: Vec<SpendAnalysis>,
474 pub sourcing_projects: Vec<SourcingProject>,
476 pub qualifications: Vec<SupplierQualification>,
478 pub rfx_events: Vec<RfxEvent>,
480 pub bids: Vec<SupplierBid>,
482 pub bid_evaluations: Vec<BidEvaluation>,
484 pub contracts: Vec<ProcurementContract>,
486 pub catalog_items: Vec<CatalogItem>,
488 pub scorecards: Vec<SupplierScorecard>,
490}
491
492#[derive(Debug, Clone, Serialize, Deserialize)]
494pub struct PeriodTrialBalance {
495 pub fiscal_year: u16,
497 pub fiscal_period: u8,
499 pub period_start: NaiveDate,
501 pub period_end: NaiveDate,
503 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
505}
506
507#[derive(Debug, Clone, Default)]
509pub struct FinancialReportingSnapshot {
510 pub financial_statements: Vec<FinancialStatement>,
512 pub bank_reconciliations: Vec<BankReconciliation>,
514 pub trial_balances: Vec<PeriodTrialBalance>,
516}
517
518#[derive(Debug, Clone, Default)]
520pub struct HrSnapshot {
521 pub payroll_runs: Vec<PayrollRun>,
523 pub payroll_line_items: Vec<PayrollLineItem>,
525 pub time_entries: Vec<TimeEntry>,
527 pub expense_reports: Vec<ExpenseReport>,
529 pub payroll_run_count: usize,
531 pub payroll_line_item_count: usize,
533 pub time_entry_count: usize,
535 pub expense_report_count: usize,
537}
538
539#[derive(Debug, Clone, Default)]
541pub struct AccountingStandardsSnapshot {
542 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
544 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
546 pub revenue_contract_count: usize,
548 pub impairment_test_count: usize,
550}
551
552#[derive(Debug, Clone, Default)]
554pub struct ManufacturingSnapshot {
555 pub production_orders: Vec<ProductionOrder>,
557 pub quality_inspections: Vec<QualityInspection>,
559 pub cycle_counts: Vec<CycleCount>,
561 pub production_order_count: usize,
563 pub quality_inspection_count: usize,
565 pub cycle_count_count: usize,
567}
568
569#[derive(Debug, Clone, Default)]
571pub struct SalesKpiBudgetsSnapshot {
572 pub sales_quotes: Vec<SalesQuote>,
574 pub kpis: Vec<ManagementKpi>,
576 pub budgets: Vec<Budget>,
578 pub sales_quote_count: usize,
580 pub kpi_count: usize,
582 pub budget_line_count: usize,
584}
585
586#[derive(Debug, Clone, Default)]
588pub struct AnomalyLabels {
589 pub labels: Vec<LabeledAnomaly>,
591 pub summary: Option<AnomalySummary>,
593 pub by_type: HashMap<String, usize>,
595}
596
597#[derive(Debug, Clone, Default)]
599pub struct BalanceValidationResult {
600 pub validated: bool,
602 pub is_balanced: bool,
604 pub entries_processed: u64,
606 pub total_debits: rust_decimal::Decimal,
608 pub total_credits: rust_decimal::Decimal,
610 pub accounts_tracked: usize,
612 pub companies_tracked: usize,
614 pub validation_errors: Vec<ValidationError>,
616 pub has_unbalanced_entries: bool,
618}
619
620#[derive(Debug, Clone, Default)]
622pub struct TaxSnapshot {
623 pub jurisdictions: Vec<TaxJurisdiction>,
625 pub codes: Vec<TaxCode>,
627 pub tax_lines: Vec<TaxLine>,
629 pub tax_returns: Vec<TaxReturn>,
631 pub tax_provisions: Vec<TaxProvision>,
633 pub withholding_records: Vec<WithholdingTaxRecord>,
635 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
637 pub jurisdiction_count: usize,
639 pub code_count: usize,
641}
642
643#[derive(Debug, Clone, Default, Serialize, Deserialize)]
645pub struct IntercompanySnapshot {
646 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
648 pub seller_journal_entries: Vec<JournalEntry>,
650 pub buyer_journal_entries: Vec<JournalEntry>,
652 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
654 pub matched_pair_count: usize,
656 pub elimination_entry_count: usize,
658 pub match_rate: f64,
660}
661
662#[derive(Debug, Clone, Default)]
664pub struct EsgSnapshot {
665 pub emissions: Vec<EmissionRecord>,
667 pub energy: Vec<EnergyConsumption>,
669 pub water: Vec<WaterUsage>,
671 pub waste: Vec<WasteRecord>,
673 pub diversity: Vec<WorkforceDiversityMetric>,
675 pub pay_equity: Vec<PayEquityMetric>,
677 pub safety_incidents: Vec<SafetyIncident>,
679 pub safety_metrics: Vec<SafetyMetric>,
681 pub governance: Vec<GovernanceMetric>,
683 pub supplier_assessments: Vec<SupplierEsgAssessment>,
685 pub materiality: Vec<MaterialityAssessment>,
687 pub disclosures: Vec<EsgDisclosure>,
689 pub climate_scenarios: Vec<ClimateScenario>,
691 pub anomaly_labels: Vec<EsgAnomalyLabel>,
693 pub emission_count: usize,
695 pub disclosure_count: usize,
697}
698
699#[derive(Debug, Clone, Default)]
701pub struct TreasurySnapshot {
702 pub cash_positions: Vec<CashPosition>,
704 pub cash_forecasts: Vec<CashForecast>,
706 pub cash_pools: Vec<CashPool>,
708 pub cash_pool_sweeps: Vec<CashPoolSweep>,
710 pub hedging_instruments: Vec<HedgingInstrument>,
712 pub hedge_relationships: Vec<HedgeRelationship>,
714 pub debt_instruments: Vec<DebtInstrument>,
716 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
718}
719
720#[derive(Debug, Clone, Default)]
722pub struct ProjectAccountingSnapshot {
723 pub projects: Vec<Project>,
725 pub cost_lines: Vec<ProjectCostLine>,
727 pub revenue_records: Vec<ProjectRevenue>,
729 pub earned_value_metrics: Vec<EarnedValueMetric>,
731 pub change_orders: Vec<ChangeOrder>,
733 pub milestones: Vec<ProjectMilestone>,
735}
736
737#[derive(Debug)]
739pub struct EnhancedGenerationResult {
740 pub chart_of_accounts: ChartOfAccounts,
742 pub master_data: MasterDataSnapshot,
744 pub document_flows: DocumentFlowSnapshot,
746 pub subledger: SubledgerSnapshot,
748 pub ocpm: OcpmSnapshot,
750 pub audit: AuditSnapshot,
752 pub banking: BankingSnapshot,
754 pub graph_export: GraphExportSnapshot,
756 pub sourcing: SourcingSnapshot,
758 pub financial_reporting: FinancialReportingSnapshot,
760 pub hr: HrSnapshot,
762 pub accounting_standards: AccountingStandardsSnapshot,
764 pub manufacturing: ManufacturingSnapshot,
766 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
768 pub tax: TaxSnapshot,
770 pub esg: EsgSnapshot,
772 pub treasury: TreasurySnapshot,
774 pub project_accounting: ProjectAccountingSnapshot,
776 pub intercompany: IntercompanySnapshot,
778 pub journal_entries: Vec<JournalEntry>,
780 pub anomaly_labels: AnomalyLabels,
782 pub balance_validation: BalanceValidationResult,
784 pub data_quality_stats: DataQualityStats,
786 pub statistics: EnhancedGenerationStatistics,
788 pub lineage: Option<super::lineage::LineageGraph>,
790 pub gate_result: Option<datasynth_eval::gates::GateResult>,
792 pub internal_controls: Vec<InternalControl>,
794 pub opening_balances: Vec<GeneratedOpeningBalance>,
796 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
798}
799
800#[derive(Debug, Clone, Default, Serialize, Deserialize)]
802pub struct EnhancedGenerationStatistics {
803 pub total_entries: u64,
805 pub total_line_items: u64,
807 pub accounts_count: usize,
809 pub companies_count: usize,
811 pub period_months: u32,
813 pub vendor_count: usize,
815 pub customer_count: usize,
816 pub material_count: usize,
817 pub asset_count: usize,
818 pub employee_count: usize,
819 pub p2p_chain_count: usize,
821 pub o2c_chain_count: usize,
822 pub ap_invoice_count: usize,
824 pub ar_invoice_count: usize,
825 pub ocpm_event_count: usize,
827 pub ocpm_object_count: usize,
828 pub ocpm_case_count: usize,
829 pub audit_engagement_count: usize,
831 pub audit_workpaper_count: usize,
832 pub audit_evidence_count: usize,
833 pub audit_risk_count: usize,
834 pub audit_finding_count: usize,
835 pub audit_judgment_count: usize,
836 pub anomalies_injected: usize,
838 pub data_quality_issues: usize,
840 pub banking_customer_count: usize,
842 pub banking_account_count: usize,
843 pub banking_transaction_count: usize,
844 pub banking_suspicious_count: usize,
845 pub graph_export_count: usize,
847 pub graph_node_count: usize,
848 pub graph_edge_count: usize,
849 #[serde(default)]
851 pub llm_enrichment_ms: u64,
852 #[serde(default)]
854 pub llm_vendors_enriched: usize,
855 #[serde(default)]
857 pub diffusion_enhancement_ms: u64,
858 #[serde(default)]
860 pub diffusion_samples_generated: usize,
861 #[serde(default)]
863 pub causal_generation_ms: u64,
864 #[serde(default)]
866 pub causal_samples_generated: usize,
867 #[serde(default)]
869 pub causal_validation_passed: Option<bool>,
870 #[serde(default)]
872 pub sourcing_project_count: usize,
873 #[serde(default)]
874 pub rfx_event_count: usize,
875 #[serde(default)]
876 pub bid_count: usize,
877 #[serde(default)]
878 pub contract_count: usize,
879 #[serde(default)]
880 pub catalog_item_count: usize,
881 #[serde(default)]
882 pub scorecard_count: usize,
883 #[serde(default)]
885 pub financial_statement_count: usize,
886 #[serde(default)]
887 pub bank_reconciliation_count: usize,
888 #[serde(default)]
890 pub payroll_run_count: usize,
891 #[serde(default)]
892 pub time_entry_count: usize,
893 #[serde(default)]
894 pub expense_report_count: usize,
895 #[serde(default)]
897 pub revenue_contract_count: usize,
898 #[serde(default)]
899 pub impairment_test_count: usize,
900 #[serde(default)]
902 pub production_order_count: usize,
903 #[serde(default)]
904 pub quality_inspection_count: usize,
905 #[serde(default)]
906 pub cycle_count_count: usize,
907 #[serde(default)]
909 pub sales_quote_count: usize,
910 #[serde(default)]
911 pub kpi_count: usize,
912 #[serde(default)]
913 pub budget_line_count: usize,
914 #[serde(default)]
916 pub tax_jurisdiction_count: usize,
917 #[serde(default)]
918 pub tax_code_count: usize,
919 #[serde(default)]
921 pub esg_emission_count: usize,
922 #[serde(default)]
923 pub esg_disclosure_count: usize,
924 #[serde(default)]
926 pub ic_matched_pair_count: usize,
927 #[serde(default)]
928 pub ic_elimination_count: usize,
929 #[serde(default)]
931 pub ic_transaction_count: usize,
932 #[serde(default)]
934 pub fa_subledger_count: usize,
935 #[serde(default)]
937 pub inventory_subledger_count: usize,
938 #[serde(default)]
940 pub treasury_debt_instrument_count: usize,
941 #[serde(default)]
943 pub treasury_hedging_instrument_count: usize,
944 #[serde(default)]
946 pub project_count: usize,
947 #[serde(default)]
949 pub project_change_order_count: usize,
950 #[serde(default)]
952 pub tax_provision_count: usize,
953 #[serde(default)]
955 pub opening_balance_count: usize,
956 #[serde(default)]
958 pub subledger_reconciliation_count: usize,
959 #[serde(default)]
961 pub tax_line_count: usize,
962 #[serde(default)]
964 pub project_cost_line_count: usize,
965 #[serde(default)]
967 pub cash_position_count: usize,
968}
969
970pub struct EnhancedOrchestrator {
972 config: GeneratorConfig,
973 phase_config: PhaseConfig,
974 coa: Option<Arc<ChartOfAccounts>>,
975 master_data: MasterDataSnapshot,
976 seed: u64,
977 multi_progress: Option<MultiProgress>,
978 resource_guard: ResourceGuard,
980 output_path: Option<PathBuf>,
982 copula_generators: Vec<CopulaGeneratorSpec>,
984 country_pack_registry: datasynth_core::CountryPackRegistry,
986 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
988}
989
990impl EnhancedOrchestrator {
991 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
993 datasynth_config::validate_config(&config)?;
994
995 let seed = config.global.seed.unwrap_or_else(rand::random);
996
997 let resource_guard = Self::build_resource_guard(&config, None);
999
1000 let country_pack_registry = match &config.country_packs {
1002 Some(cp) => {
1003 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1004 .map_err(|e| SynthError::config(e.to_string()))?
1005 }
1006 None => datasynth_core::CountryPackRegistry::builtin_only()
1007 .map_err(|e| SynthError::config(e.to_string()))?,
1008 };
1009
1010 Ok(Self {
1011 config,
1012 phase_config,
1013 coa: None,
1014 master_data: MasterDataSnapshot::default(),
1015 seed,
1016 multi_progress: None,
1017 resource_guard,
1018 output_path: None,
1019 copula_generators: Vec::new(),
1020 country_pack_registry,
1021 phase_sink: None,
1022 })
1023 }
1024
1025 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1027 Self::new(config, PhaseConfig::default())
1028 }
1029
1030 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1032 self.phase_sink = Some(sink);
1033 self
1034 }
1035
1036 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1038 if let Some(ref sink) = self.phase_sink {
1039 for item in items {
1040 if let Ok(value) = serde_json::to_value(item) {
1041 let _ = sink.emit(phase, type_name, &value);
1042 }
1043 }
1044 let _ = sink.phase_complete(phase);
1045 }
1046 }
1047
1048 pub fn with_progress(mut self, show: bool) -> Self {
1050 self.phase_config.show_progress = show;
1051 if show {
1052 self.multi_progress = Some(MultiProgress::new());
1053 }
1054 self
1055 }
1056
1057 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1059 let path = path.into();
1060 self.output_path = Some(path.clone());
1061 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1063 self
1064 }
1065
1066 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1068 &self.country_pack_registry
1069 }
1070
1071 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1073 self.country_pack_registry.get_by_str(country)
1074 }
1075
1076 fn primary_country_code(&self) -> &str {
1079 self.config
1080 .companies
1081 .first()
1082 .map(|c| c.country.as_str())
1083 .unwrap_or("US")
1084 }
1085
1086 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1088 self.country_pack_for(self.primary_country_code())
1089 }
1090
1091 fn resolve_coa_framework(&self) -> CoAFramework {
1093 if self.config.accounting_standards.enabled {
1094 match self.config.accounting_standards.framework {
1095 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1096 return CoAFramework::FrenchPcg;
1097 }
1098 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1099 return CoAFramework::GermanSkr04;
1100 }
1101 _ => {}
1102 }
1103 }
1104 let pack = self.primary_pack();
1106 match pack.accounting.framework.as_str() {
1107 "french_gaap" => CoAFramework::FrenchPcg,
1108 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1109 _ => CoAFramework::UsGaap,
1110 }
1111 }
1112
1113 pub fn has_copulas(&self) -> bool {
1118 !self.copula_generators.is_empty()
1119 }
1120
1121 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1127 &self.copula_generators
1128 }
1129
1130 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1134 &mut self.copula_generators
1135 }
1136
1137 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1141 self.copula_generators
1142 .iter_mut()
1143 .find(|c| c.name == copula_name)
1144 .map(|c| c.generator.sample())
1145 }
1146
1147 pub fn from_fingerprint(
1170 fingerprint_path: &std::path::Path,
1171 phase_config: PhaseConfig,
1172 scale: f64,
1173 ) -> SynthResult<Self> {
1174 info!("Loading fingerprint from: {}", fingerprint_path.display());
1175
1176 let reader = FingerprintReader::new();
1178 let fingerprint = reader
1179 .read_from_file(fingerprint_path)
1180 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1181
1182 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1183 }
1184
1185 pub fn from_fingerprint_data(
1192 fingerprint: Fingerprint,
1193 phase_config: PhaseConfig,
1194 scale: f64,
1195 ) -> SynthResult<Self> {
1196 info!(
1197 "Synthesizing config from fingerprint (version: {}, tables: {})",
1198 fingerprint.manifest.version,
1199 fingerprint.schema.tables.len()
1200 );
1201
1202 let seed: u64 = rand::random();
1204
1205 let options = SynthesisOptions {
1207 scale,
1208 seed: Some(seed),
1209 preserve_correlations: true,
1210 inject_anomalies: true,
1211 };
1212 let synthesizer = ConfigSynthesizer::with_options(options);
1213
1214 let synthesis_result = synthesizer
1216 .synthesize_full(&fingerprint, seed)
1217 .map_err(|e| {
1218 SynthError::config(format!(
1219 "Failed to synthesize config from fingerprint: {}",
1220 e
1221 ))
1222 })?;
1223
1224 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1226 Self::base_config_for_industry(industry)
1227 } else {
1228 Self::base_config_for_industry("manufacturing")
1229 };
1230
1231 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1233
1234 info!(
1236 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1237 fingerprint.schema.tables.len(),
1238 scale,
1239 synthesis_result.copula_generators.len()
1240 );
1241
1242 if !synthesis_result.copula_generators.is_empty() {
1243 for spec in &synthesis_result.copula_generators {
1244 info!(
1245 " Copula '{}' for table '{}': {} columns",
1246 spec.name,
1247 spec.table,
1248 spec.columns.len()
1249 );
1250 }
1251 }
1252
1253 let mut orchestrator = Self::new(config, phase_config)?;
1255
1256 orchestrator.copula_generators = synthesis_result.copula_generators;
1258
1259 Ok(orchestrator)
1260 }
1261
1262 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1264 use datasynth_config::presets::create_preset;
1265 use datasynth_config::TransactionVolume;
1266 use datasynth_core::models::{CoAComplexity, IndustrySector};
1267
1268 let sector = match industry.to_lowercase().as_str() {
1269 "manufacturing" => IndustrySector::Manufacturing,
1270 "retail" => IndustrySector::Retail,
1271 "financial" | "financial_services" => IndustrySector::FinancialServices,
1272 "healthcare" => IndustrySector::Healthcare,
1273 "technology" | "tech" => IndustrySector::Technology,
1274 _ => IndustrySector::Manufacturing,
1275 };
1276
1277 create_preset(
1279 sector,
1280 1, 12, CoAComplexity::Medium,
1283 TransactionVolume::TenK,
1284 )
1285 }
1286
1287 fn apply_config_patch(
1289 mut config: GeneratorConfig,
1290 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1291 ) -> GeneratorConfig {
1292 use datasynth_fingerprint::synthesis::ConfigValue;
1293
1294 for (key, value) in patch.values() {
1295 match (key.as_str(), value) {
1296 ("transactions.count", ConfigValue::Integer(n)) => {
1299 info!(
1300 "Fingerprint suggests {} transactions (apply via company volumes)",
1301 n
1302 );
1303 }
1304 ("global.period_months", ConfigValue::Integer(n)) => {
1305 config.global.period_months = *n as u32;
1306 }
1307 ("global.start_date", ConfigValue::String(s)) => {
1308 config.global.start_date = s.clone();
1309 }
1310 ("global.seed", ConfigValue::Integer(n)) => {
1311 config.global.seed = Some(*n as u64);
1312 }
1313 ("fraud.enabled", ConfigValue::Bool(b)) => {
1314 config.fraud.enabled = *b;
1315 }
1316 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1317 config.fraud.fraud_rate = *f;
1318 }
1319 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1320 config.data_quality.enabled = *b;
1321 }
1322 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1324 config.fraud.enabled = *b;
1325 }
1326 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1327 config.fraud.fraud_rate = *f;
1328 }
1329 _ => {
1330 debug!("Ignoring unknown config patch key: {}", key);
1331 }
1332 }
1333 }
1334
1335 config
1336 }
1337
1338 fn build_resource_guard(
1340 config: &GeneratorConfig,
1341 output_path: Option<PathBuf>,
1342 ) -> ResourceGuard {
1343 let mut builder = ResourceGuardBuilder::new();
1344
1345 if config.global.memory_limit_mb > 0 {
1347 builder = builder.memory_limit(config.global.memory_limit_mb);
1348 }
1349
1350 if let Some(path) = output_path {
1352 builder = builder.output_path(path).min_free_disk(100); }
1354
1355 builder = builder.conservative();
1357
1358 builder.build()
1359 }
1360
1361 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1366 self.resource_guard.check()
1367 }
1368
1369 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1371 let level = self.resource_guard.check()?;
1372
1373 if level != DegradationLevel::Normal {
1374 warn!(
1375 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1376 phase,
1377 level,
1378 self.resource_guard.current_memory_mb(),
1379 self.resource_guard.available_disk_mb()
1380 );
1381 }
1382
1383 Ok(level)
1384 }
1385
1386 fn get_degradation_actions(&self) -> DegradationActions {
1388 self.resource_guard.get_actions()
1389 }
1390
1391 fn check_memory_limit(&self) -> SynthResult<()> {
1393 self.check_resources()?;
1394 Ok(())
1395 }
1396
1397 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1399 info!("Starting enhanced generation workflow");
1400 info!(
1401 "Config: industry={:?}, period_months={}, companies={}",
1402 self.config.global.industry,
1403 self.config.global.period_months,
1404 self.config.companies.len()
1405 );
1406
1407 let initial_level = self.check_resources_with_log("initial")?;
1409 if initial_level == DegradationLevel::Emergency {
1410 return Err(SynthError::resource(
1411 "Insufficient resources to start generation",
1412 ));
1413 }
1414
1415 let mut stats = EnhancedGenerationStatistics {
1416 companies_count: self.config.companies.len(),
1417 period_months: self.config.global.period_months,
1418 ..Default::default()
1419 };
1420
1421 let coa = self.phase_chart_of_accounts(&mut stats)?;
1423
1424 self.phase_master_data(&mut stats)?;
1426
1427 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1429 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1430 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1431
1432 let (mut document_flows, subledger, fa_journal_entries) =
1434 self.phase_document_flows(&mut stats)?;
1435
1436 self.emit_phase_items(
1438 "document_flows",
1439 "PurchaseOrder",
1440 &document_flows.purchase_orders,
1441 );
1442 self.emit_phase_items(
1443 "document_flows",
1444 "GoodsReceipt",
1445 &document_flows.goods_receipts,
1446 );
1447 self.emit_phase_items(
1448 "document_flows",
1449 "VendorInvoice",
1450 &document_flows.vendor_invoices,
1451 );
1452 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1453 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1454
1455 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1457
1458 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1466
1467 if !fa_journal_entries.is_empty() {
1469 debug!(
1470 "Appending {} FA acquisition JEs to main entries",
1471 fa_journal_entries.len()
1472 );
1473 entries.extend(fa_journal_entries);
1474 }
1475
1476 let actions = self.get_degradation_actions();
1478
1479 let sourcing = self.phase_sourcing_data(&mut stats)?;
1481
1482 if !sourcing.contracts.is_empty() {
1484 let mut linked_count = 0usize;
1485 for chain in &mut document_flows.p2p_chains {
1486 if chain.purchase_order.contract_id.is_none() {
1487 if let Some(contract) = sourcing
1488 .contracts
1489 .iter()
1490 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1491 {
1492 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1493 linked_count += 1;
1494 }
1495 }
1496 }
1497 if linked_count > 0 {
1498 debug!(
1499 "Linked {} purchase orders to S2C contracts by vendor match",
1500 linked_count
1501 );
1502 }
1503 }
1504
1505 let intercompany = self.phase_intercompany(&mut stats)?;
1507
1508 if !intercompany.seller_journal_entries.is_empty()
1510 || !intercompany.buyer_journal_entries.is_empty()
1511 {
1512 let ic_je_count = intercompany.seller_journal_entries.len()
1513 + intercompany.buyer_journal_entries.len();
1514 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1515 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1516 debug!(
1517 "Appended {} IC journal entries to main entries",
1518 ic_je_count
1519 );
1520 }
1521
1522 let hr = self.phase_hr_data(&mut stats)?;
1524
1525 if !hr.payroll_runs.is_empty() {
1527 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1528 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1529 entries.extend(payroll_jes);
1530 }
1531
1532 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1534
1535 if !manufacturing_snap.production_orders.is_empty() {
1537 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1538 debug!("Generated {} JEs from production orders", mfg_jes.len());
1539 entries.extend(mfg_jes);
1540 }
1541
1542 if !entries.is_empty() {
1545 stats.total_entries = entries.len() as u64;
1546 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1547 debug!(
1548 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1549 stats.total_entries, stats.total_line_items
1550 );
1551 }
1552
1553 if self.config.internal_controls.enabled && !entries.is_empty() {
1555 info!("Phase 7b: Applying internal controls to journal entries");
1556 let control_config = ControlGeneratorConfig {
1557 exception_rate: self.config.internal_controls.exception_rate,
1558 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1559 enable_sox_marking: true,
1560 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1561 self.config.internal_controls.sox_materiality_threshold,
1562 )
1563 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1564 };
1565 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1566 for entry in &mut entries {
1567 control_gen.apply_controls(entry, &coa);
1568 }
1569 let with_controls = entries
1570 .iter()
1571 .filter(|e| !e.header.control_ids.is_empty())
1572 .count();
1573 info!(
1574 "Applied controls to {} entries ({} with control IDs assigned)",
1575 entries.len(),
1576 with_controls
1577 );
1578 }
1579
1580 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1582
1583 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1585
1586 self.emit_phase_items(
1588 "anomaly_injection",
1589 "LabeledAnomaly",
1590 &anomaly_labels.labels,
1591 );
1592
1593 let balance_validation = self.phase_balance_validation(&entries)?;
1595
1596 let subledger_reconciliation =
1598 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1599
1600 let data_quality_stats =
1602 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1603
1604 let audit = self.phase_audit_data(&entries, &mut stats)?;
1606
1607 let banking = self.phase_banking_data(&mut stats)?;
1609
1610 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1612
1613 self.phase_llm_enrichment(&mut stats);
1615
1616 self.phase_diffusion_enhancement(&mut stats);
1618
1619 self.phase_causal_overlay(&mut stats);
1621
1622 let financial_reporting =
1624 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1625
1626 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1628
1629 let ocpm = self.phase_ocpm_events(
1631 &document_flows,
1632 &sourcing,
1633 &hr,
1634 &manufacturing_snap,
1635 &banking,
1636 &audit,
1637 &financial_reporting,
1638 &mut stats,
1639 )?;
1640
1641 if let Some(ref event_log) = ocpm.event_log {
1643 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1644 }
1645
1646 let sales_kpi_budgets =
1648 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1649
1650 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1652
1653 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1655
1656 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1658
1659 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1661
1662 self.phase_hypergraph_export(
1664 &coa,
1665 &entries,
1666 &document_flows,
1667 &sourcing,
1668 &hr,
1669 &manufacturing_snap,
1670 &banking,
1671 &audit,
1672 &financial_reporting,
1673 &ocpm,
1674 &mut stats,
1675 )?;
1676
1677 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1680 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1681 }
1682
1683 if self.config.streaming.enabled {
1685 info!("Note: streaming config is enabled but batch mode does not use it");
1686 }
1687 if self.config.vendor_network.enabled {
1688 debug!("Vendor network config available; relationship graph generation is partial");
1689 }
1690 if self.config.customer_segmentation.enabled {
1691 debug!("Customer segmentation config available; segment-aware generation is partial");
1692 }
1693
1694 let resource_stats = self.resource_guard.stats();
1696 info!(
1697 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1698 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1699 resource_stats.disk.estimated_bytes_written,
1700 resource_stats.degradation_level
1701 );
1702
1703 if let Some(ref sink) = self.phase_sink {
1705 let _ = sink.flush();
1706 }
1707
1708 let lineage = self.build_lineage_graph();
1710
1711 let gate_result = if self.config.quality_gates.enabled {
1713 let profile_name = &self.config.quality_gates.profile;
1714 match datasynth_eval::gates::get_profile(profile_name) {
1715 Some(profile) => {
1716 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1718
1719 if balance_validation.validated {
1721 eval.coherence.balance =
1722 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1723 equation_balanced: balance_validation.is_balanced,
1724 max_imbalance: (balance_validation.total_debits
1725 - balance_validation.total_credits)
1726 .abs(),
1727 periods_evaluated: 1,
1728 periods_imbalanced: if balance_validation.is_balanced {
1729 0
1730 } else {
1731 1
1732 },
1733 period_results: Vec::new(),
1734 companies_evaluated: self.config.companies.len(),
1735 });
1736 }
1737
1738 eval.coherence.passes = balance_validation.is_balanced;
1740 if !balance_validation.is_balanced {
1741 eval.coherence
1742 .failures
1743 .push("Balance sheet equation not satisfied".to_string());
1744 }
1745
1746 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1748 eval.statistical.passes = !entries.is_empty();
1749
1750 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1753
1754 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1755 info!(
1756 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1757 profile_name, result.gates_passed, result.gates_total, result.summary
1758 );
1759 Some(result)
1760 }
1761 None => {
1762 warn!(
1763 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1764 profile_name
1765 );
1766 None
1767 }
1768 }
1769 } else {
1770 None
1771 };
1772
1773 let internal_controls = if self.config.internal_controls.enabled {
1775 InternalControl::standard_controls()
1776 } else {
1777 Vec::new()
1778 };
1779
1780 Ok(EnhancedGenerationResult {
1781 chart_of_accounts: (*coa).clone(),
1782 master_data: self.master_data.clone(),
1783 document_flows,
1784 subledger,
1785 ocpm,
1786 audit,
1787 banking,
1788 graph_export,
1789 sourcing,
1790 financial_reporting,
1791 hr,
1792 accounting_standards,
1793 manufacturing: manufacturing_snap,
1794 sales_kpi_budgets,
1795 tax,
1796 esg: esg_snap,
1797 treasury,
1798 project_accounting,
1799 intercompany,
1800 journal_entries: entries,
1801 anomaly_labels,
1802 balance_validation,
1803 data_quality_stats,
1804 statistics: stats,
1805 lineage: Some(lineage),
1806 gate_result,
1807 internal_controls,
1808 opening_balances,
1809 subledger_reconciliation,
1810 })
1811 }
1812
1813 fn phase_chart_of_accounts(
1819 &mut self,
1820 stats: &mut EnhancedGenerationStatistics,
1821 ) -> SynthResult<Arc<ChartOfAccounts>> {
1822 info!("Phase 1: Generating Chart of Accounts");
1823 let coa = self.generate_coa()?;
1824 stats.accounts_count = coa.account_count();
1825 info!(
1826 "Chart of Accounts generated: {} accounts",
1827 stats.accounts_count
1828 );
1829 self.check_resources_with_log("post-coa")?;
1830 Ok(coa)
1831 }
1832
1833 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1835 if self.phase_config.generate_master_data {
1836 info!("Phase 2: Generating Master Data");
1837 self.generate_master_data()?;
1838 stats.vendor_count = self.master_data.vendors.len();
1839 stats.customer_count = self.master_data.customers.len();
1840 stats.material_count = self.master_data.materials.len();
1841 stats.asset_count = self.master_data.assets.len();
1842 stats.employee_count = self.master_data.employees.len();
1843 info!(
1844 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1845 stats.vendor_count, stats.customer_count, stats.material_count,
1846 stats.asset_count, stats.employee_count
1847 );
1848 self.check_resources_with_log("post-master-data")?;
1849 } else {
1850 debug!("Phase 2: Skipped (master data generation disabled)");
1851 }
1852 Ok(())
1853 }
1854
1855 fn phase_document_flows(
1857 &mut self,
1858 stats: &mut EnhancedGenerationStatistics,
1859 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1860 let mut document_flows = DocumentFlowSnapshot::default();
1861 let mut subledger = SubledgerSnapshot::default();
1862
1863 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1864 info!("Phase 3: Generating Document Flows");
1865 self.generate_document_flows(&mut document_flows)?;
1866 stats.p2p_chain_count = document_flows.p2p_chains.len();
1867 stats.o2c_chain_count = document_flows.o2c_chains.len();
1868 info!(
1869 "Document flows generated: {} P2P chains, {} O2C chains",
1870 stats.p2p_chain_count, stats.o2c_chain_count
1871 );
1872
1873 debug!("Phase 3b: Linking document flows to subledgers");
1875 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1876 stats.ap_invoice_count = subledger.ap_invoices.len();
1877 stats.ar_invoice_count = subledger.ar_invoices.len();
1878 debug!(
1879 "Subledgers linked: {} AP invoices, {} AR invoices",
1880 stats.ap_invoice_count, stats.ar_invoice_count
1881 );
1882
1883 self.check_resources_with_log("post-document-flows")?;
1884 } else {
1885 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1886 }
1887
1888 let mut fa_journal_entries = Vec::new();
1890 if !self.master_data.assets.is_empty() {
1891 debug!("Generating FA subledger records");
1892 let company_code = self
1893 .config
1894 .companies
1895 .first()
1896 .map(|c| c.code.as_str())
1897 .unwrap_or("1000");
1898 let currency = self
1899 .config
1900 .companies
1901 .first()
1902 .map(|c| c.currency.as_str())
1903 .unwrap_or("USD");
1904
1905 let mut fa_gen = datasynth_generators::FAGenerator::new(
1906 datasynth_generators::FAGeneratorConfig::default(),
1907 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1908 );
1909
1910 for asset in &self.master_data.assets {
1911 let (record, je) = fa_gen.generate_asset_acquisition(
1912 company_code,
1913 &format!("{:?}", asset.asset_class),
1914 &asset.description,
1915 asset.acquisition_date,
1916 currency,
1917 asset.cost_center.as_deref(),
1918 );
1919 subledger.fa_records.push(record);
1920 fa_journal_entries.push(je);
1921 }
1922
1923 stats.fa_subledger_count = subledger.fa_records.len();
1924 debug!(
1925 "FA subledger records generated: {} (with {} acquisition JEs)",
1926 stats.fa_subledger_count,
1927 fa_journal_entries.len()
1928 );
1929 }
1930
1931 if !self.master_data.materials.is_empty() {
1933 debug!("Generating Inventory subledger records");
1934 let first_company = self.config.companies.first();
1935 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1936 let inv_currency = first_company
1937 .map(|c| c.currency.clone())
1938 .unwrap_or_else(|| "USD".to_string());
1939
1940 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1941 datasynth_generators::InventoryGeneratorConfig::default(),
1942 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1943 inv_currency.clone(),
1944 );
1945
1946 for (i, material) in self.master_data.materials.iter().enumerate() {
1947 let plant = format!("PLANT{:02}", (i % 3) + 1);
1948 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1949 let initial_qty = rust_decimal::Decimal::from(
1950 material
1951 .safety_stock
1952 .to_string()
1953 .parse::<i64>()
1954 .unwrap_or(100),
1955 );
1956
1957 let position = inv_gen.generate_position(
1958 company_code,
1959 &plant,
1960 &storage_loc,
1961 &material.material_id,
1962 &material.description,
1963 initial_qty,
1964 Some(material.standard_cost),
1965 &inv_currency,
1966 );
1967 subledger.inventory_positions.push(position);
1968 }
1969
1970 stats.inventory_subledger_count = subledger.inventory_positions.len();
1971 debug!(
1972 "Inventory subledger records generated: {}",
1973 stats.inventory_subledger_count
1974 );
1975 }
1976
1977 Ok((document_flows, subledger, fa_journal_entries))
1978 }
1979
1980 #[allow(clippy::too_many_arguments)]
1982 fn phase_ocpm_events(
1983 &mut self,
1984 document_flows: &DocumentFlowSnapshot,
1985 sourcing: &SourcingSnapshot,
1986 hr: &HrSnapshot,
1987 manufacturing: &ManufacturingSnapshot,
1988 banking: &BankingSnapshot,
1989 audit: &AuditSnapshot,
1990 financial_reporting: &FinancialReportingSnapshot,
1991 stats: &mut EnhancedGenerationStatistics,
1992 ) -> SynthResult<OcpmSnapshot> {
1993 if self.phase_config.generate_ocpm_events {
1994 info!("Phase 3c: Generating OCPM Events");
1995 let ocpm_snapshot = self.generate_ocpm_events(
1996 document_flows,
1997 sourcing,
1998 hr,
1999 manufacturing,
2000 banking,
2001 audit,
2002 financial_reporting,
2003 )?;
2004 stats.ocpm_event_count = ocpm_snapshot.event_count;
2005 stats.ocpm_object_count = ocpm_snapshot.object_count;
2006 stats.ocpm_case_count = ocpm_snapshot.case_count;
2007 info!(
2008 "OCPM events generated: {} events, {} objects, {} cases",
2009 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2010 );
2011 self.check_resources_with_log("post-ocpm")?;
2012 Ok(ocpm_snapshot)
2013 } else {
2014 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2015 Ok(OcpmSnapshot::default())
2016 }
2017 }
2018
2019 fn phase_journal_entries(
2021 &mut self,
2022 coa: &Arc<ChartOfAccounts>,
2023 document_flows: &DocumentFlowSnapshot,
2024 _stats: &mut EnhancedGenerationStatistics,
2025 ) -> SynthResult<Vec<JournalEntry>> {
2026 let mut entries = Vec::new();
2027
2028 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2030 debug!("Phase 4a: Generating JEs from document flows");
2031 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2032 debug!("Generated {} JEs from document flows", flow_entries.len());
2033 entries.extend(flow_entries);
2034 }
2035
2036 if self.phase_config.generate_journal_entries {
2038 info!("Phase 4: Generating Journal Entries");
2039 let je_entries = self.generate_journal_entries(coa)?;
2040 info!("Generated {} standalone journal entries", je_entries.len());
2041 entries.extend(je_entries);
2042 } else {
2043 debug!("Phase 4: Skipped (journal entry generation disabled)");
2044 }
2045
2046 if !entries.is_empty() {
2047 self.check_resources_with_log("post-journal-entries")?;
2050 }
2051
2052 Ok(entries)
2053 }
2054
2055 fn phase_anomaly_injection(
2057 &mut self,
2058 entries: &mut [JournalEntry],
2059 actions: &DegradationActions,
2060 stats: &mut EnhancedGenerationStatistics,
2061 ) -> SynthResult<AnomalyLabels> {
2062 if self.phase_config.inject_anomalies
2063 && !entries.is_empty()
2064 && !actions.skip_anomaly_injection
2065 {
2066 info!("Phase 5: Injecting Anomalies");
2067 let result = self.inject_anomalies(entries)?;
2068 stats.anomalies_injected = result.labels.len();
2069 info!("Injected {} anomalies", stats.anomalies_injected);
2070 self.check_resources_with_log("post-anomaly-injection")?;
2071 Ok(result)
2072 } else if actions.skip_anomaly_injection {
2073 warn!("Phase 5: Skipped due to resource degradation");
2074 Ok(AnomalyLabels::default())
2075 } else {
2076 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2077 Ok(AnomalyLabels::default())
2078 }
2079 }
2080
2081 fn phase_balance_validation(
2083 &mut self,
2084 entries: &[JournalEntry],
2085 ) -> SynthResult<BalanceValidationResult> {
2086 if self.phase_config.validate_balances && !entries.is_empty() {
2087 debug!("Phase 6: Validating Balances");
2088 let balance_validation = self.validate_journal_entries(entries)?;
2089 if balance_validation.is_balanced {
2090 debug!("Balance validation passed");
2091 } else {
2092 warn!(
2093 "Balance validation found {} errors",
2094 balance_validation.validation_errors.len()
2095 );
2096 }
2097 Ok(balance_validation)
2098 } else {
2099 Ok(BalanceValidationResult::default())
2100 }
2101 }
2102
2103 fn phase_data_quality_injection(
2105 &mut self,
2106 entries: &mut [JournalEntry],
2107 actions: &DegradationActions,
2108 stats: &mut EnhancedGenerationStatistics,
2109 ) -> SynthResult<DataQualityStats> {
2110 if self.phase_config.inject_data_quality
2111 && !entries.is_empty()
2112 && !actions.skip_data_quality
2113 {
2114 info!("Phase 7: Injecting Data Quality Variations");
2115 let dq_stats = self.inject_data_quality(entries)?;
2116 stats.data_quality_issues = dq_stats.records_with_issues;
2117 info!("Injected {} data quality issues", stats.data_quality_issues);
2118 self.check_resources_with_log("post-data-quality")?;
2119 Ok(dq_stats)
2120 } else if actions.skip_data_quality {
2121 warn!("Phase 7: Skipped due to resource degradation");
2122 Ok(DataQualityStats::default())
2123 } else {
2124 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2125 Ok(DataQualityStats::default())
2126 }
2127 }
2128
2129 fn phase_audit_data(
2131 &mut self,
2132 entries: &[JournalEntry],
2133 stats: &mut EnhancedGenerationStatistics,
2134 ) -> SynthResult<AuditSnapshot> {
2135 if self.phase_config.generate_audit {
2136 info!("Phase 8: Generating Audit Data");
2137 let audit_snapshot = self.generate_audit_data(entries)?;
2138 stats.audit_engagement_count = audit_snapshot.engagements.len();
2139 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2140 stats.audit_evidence_count = audit_snapshot.evidence.len();
2141 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2142 stats.audit_finding_count = audit_snapshot.findings.len();
2143 stats.audit_judgment_count = audit_snapshot.judgments.len();
2144 info!(
2145 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2146 stats.audit_engagement_count, stats.audit_workpaper_count,
2147 stats.audit_evidence_count, stats.audit_risk_count,
2148 stats.audit_finding_count, stats.audit_judgment_count
2149 );
2150 self.check_resources_with_log("post-audit")?;
2151 Ok(audit_snapshot)
2152 } else {
2153 debug!("Phase 8: Skipped (audit generation disabled)");
2154 Ok(AuditSnapshot::default())
2155 }
2156 }
2157
2158 fn phase_banking_data(
2160 &mut self,
2161 stats: &mut EnhancedGenerationStatistics,
2162 ) -> SynthResult<BankingSnapshot> {
2163 if self.phase_config.generate_banking && self.config.banking.enabled {
2164 info!("Phase 9: Generating Banking KYC/AML Data");
2165 let banking_snapshot = self.generate_banking_data()?;
2166 stats.banking_customer_count = banking_snapshot.customers.len();
2167 stats.banking_account_count = banking_snapshot.accounts.len();
2168 stats.banking_transaction_count = banking_snapshot.transactions.len();
2169 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2170 info!(
2171 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2172 stats.banking_customer_count, stats.banking_account_count,
2173 stats.banking_transaction_count, stats.banking_suspicious_count
2174 );
2175 self.check_resources_with_log("post-banking")?;
2176 Ok(banking_snapshot)
2177 } else {
2178 debug!("Phase 9: Skipped (banking generation disabled)");
2179 Ok(BankingSnapshot::default())
2180 }
2181 }
2182
2183 fn phase_graph_export(
2185 &mut self,
2186 entries: &[JournalEntry],
2187 coa: &Arc<ChartOfAccounts>,
2188 stats: &mut EnhancedGenerationStatistics,
2189 ) -> SynthResult<GraphExportSnapshot> {
2190 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2191 && !entries.is_empty()
2192 {
2193 info!("Phase 10: Exporting Accounting Network Graphs");
2194 match self.export_graphs(entries, coa, stats) {
2195 Ok(snapshot) => {
2196 info!(
2197 "Graph export complete: {} graphs ({} nodes, {} edges)",
2198 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2199 );
2200 Ok(snapshot)
2201 }
2202 Err(e) => {
2203 warn!("Phase 10: Graph export failed: {}", e);
2204 Ok(GraphExportSnapshot::default())
2205 }
2206 }
2207 } else {
2208 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2209 Ok(GraphExportSnapshot::default())
2210 }
2211 }
2212
2213 #[allow(clippy::too_many_arguments)]
2215 fn phase_hypergraph_export(
2216 &self,
2217 coa: &Arc<ChartOfAccounts>,
2218 entries: &[JournalEntry],
2219 document_flows: &DocumentFlowSnapshot,
2220 sourcing: &SourcingSnapshot,
2221 hr: &HrSnapshot,
2222 manufacturing: &ManufacturingSnapshot,
2223 banking: &BankingSnapshot,
2224 audit: &AuditSnapshot,
2225 financial_reporting: &FinancialReportingSnapshot,
2226 ocpm: &OcpmSnapshot,
2227 stats: &mut EnhancedGenerationStatistics,
2228 ) -> SynthResult<()> {
2229 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2230 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2231 match self.export_hypergraph(
2232 coa,
2233 entries,
2234 document_flows,
2235 sourcing,
2236 hr,
2237 manufacturing,
2238 banking,
2239 audit,
2240 financial_reporting,
2241 ocpm,
2242 stats,
2243 ) {
2244 Ok(info) => {
2245 info!(
2246 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2247 info.node_count, info.edge_count, info.hyperedge_count
2248 );
2249 }
2250 Err(e) => {
2251 warn!("Phase 10b: Hypergraph export failed: {}", e);
2252 }
2253 }
2254 } else {
2255 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2256 }
2257 Ok(())
2258 }
2259
2260 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2266 if !self.config.llm.enabled {
2267 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2268 return;
2269 }
2270
2271 info!("Phase 11: Starting LLM Enrichment");
2272 let start = std::time::Instant::now();
2273
2274 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2275 let provider = Arc::new(MockLlmProvider::new(self.seed));
2276 let enricher = VendorLlmEnricher::new(provider);
2277
2278 let industry = format!("{:?}", self.config.global.industry);
2279 let max_enrichments = self
2280 .config
2281 .llm
2282 .max_vendor_enrichments
2283 .min(self.master_data.vendors.len());
2284
2285 let mut enriched_count = 0usize;
2286 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2287 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2288 Ok(name) => {
2289 vendor.name = name;
2290 enriched_count += 1;
2291 }
2292 Err(e) => {
2293 warn!(
2294 "LLM vendor enrichment failed for {}: {}",
2295 vendor.vendor_id, e
2296 );
2297 }
2298 }
2299 }
2300
2301 enriched_count
2302 }));
2303
2304 match result {
2305 Ok(enriched_count) => {
2306 stats.llm_vendors_enriched = enriched_count;
2307 let elapsed = start.elapsed();
2308 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2309 info!(
2310 "Phase 11 complete: {} vendors enriched in {}ms",
2311 enriched_count, stats.llm_enrichment_ms
2312 );
2313 }
2314 Err(_) => {
2315 let elapsed = start.elapsed();
2316 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2317 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2318 }
2319 }
2320 }
2321
2322 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2328 if !self.config.diffusion.enabled {
2329 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2330 return;
2331 }
2332
2333 info!("Phase 12: Starting Diffusion Enhancement");
2334 let start = std::time::Instant::now();
2335
2336 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2337 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2340
2341 let diffusion_config = DiffusionConfig {
2342 n_steps: self.config.diffusion.n_steps,
2343 seed: self.seed,
2344 ..Default::default()
2345 };
2346
2347 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2348
2349 let n_samples = self.config.diffusion.sample_size;
2350 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2352
2353 samples.len()
2354 }));
2355
2356 match result {
2357 Ok(sample_count) => {
2358 stats.diffusion_samples_generated = sample_count;
2359 let elapsed = start.elapsed();
2360 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2361 info!(
2362 "Phase 12 complete: {} diffusion samples generated in {}ms",
2363 sample_count, stats.diffusion_enhancement_ms
2364 );
2365 }
2366 Err(_) => {
2367 let elapsed = start.elapsed();
2368 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2369 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2370 }
2371 }
2372 }
2373
2374 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2381 if !self.config.causal.enabled {
2382 debug!("Phase 13: Skipped (causal generation disabled)");
2383 return;
2384 }
2385
2386 info!("Phase 13: Starting Causal Overlay");
2387 let start = std::time::Instant::now();
2388
2389 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2390 let graph = match self.config.causal.template.as_str() {
2392 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2393 _ => CausalGraph::fraud_detection_template(),
2394 };
2395
2396 let scm = StructuralCausalModel::new(graph.clone())
2397 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2398
2399 let n_samples = self.config.causal.sample_size;
2400 let samples = scm
2401 .generate(n_samples, self.seed)
2402 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2403
2404 let validation_passed = if self.config.causal.validate {
2406 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2407 if report.valid {
2408 info!(
2409 "Causal validation passed: all {} checks OK",
2410 report.checks.len()
2411 );
2412 } else {
2413 warn!(
2414 "Causal validation: {} violations detected: {:?}",
2415 report.violations.len(),
2416 report.violations
2417 );
2418 }
2419 Some(report.valid)
2420 } else {
2421 None
2422 };
2423
2424 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2425 }));
2426
2427 match result {
2428 Ok(Ok((sample_count, validation_passed))) => {
2429 stats.causal_samples_generated = sample_count;
2430 stats.causal_validation_passed = validation_passed;
2431 let elapsed = start.elapsed();
2432 stats.causal_generation_ms = elapsed.as_millis() as u64;
2433 info!(
2434 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2435 sample_count, stats.causal_generation_ms, validation_passed,
2436 );
2437 }
2438 Ok(Err(e)) => {
2439 let elapsed = start.elapsed();
2440 stats.causal_generation_ms = elapsed.as_millis() as u64;
2441 warn!("Phase 13: Causal generation failed: {}", e);
2442 }
2443 Err(_) => {
2444 let elapsed = start.elapsed();
2445 stats.causal_generation_ms = elapsed.as_millis() as u64;
2446 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2447 }
2448 }
2449 }
2450
2451 fn phase_sourcing_data(
2453 &mut self,
2454 stats: &mut EnhancedGenerationStatistics,
2455 ) -> SynthResult<SourcingSnapshot> {
2456 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2457 debug!("Phase 14: Skipped (sourcing generation disabled)");
2458 return Ok(SourcingSnapshot::default());
2459 }
2460
2461 info!("Phase 14: Generating S2C Sourcing Data");
2462 let seed = self.seed;
2463
2464 let vendor_ids: Vec<String> = self
2466 .master_data
2467 .vendors
2468 .iter()
2469 .map(|v| v.vendor_id.clone())
2470 .collect();
2471 if vendor_ids.is_empty() {
2472 debug!("Phase 14: Skipped (no vendors available)");
2473 return Ok(SourcingSnapshot::default());
2474 }
2475
2476 let categories: Vec<(String, String)> = vec![
2477 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2478 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2479 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2480 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2481 ("CAT-LOG".to_string(), "Logistics".to_string()),
2482 ];
2483 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2484 .iter()
2485 .map(|(id, name)| {
2486 (
2487 id.clone(),
2488 name.clone(),
2489 rust_decimal::Decimal::from(100_000),
2490 )
2491 })
2492 .collect();
2493
2494 let company_code = self
2495 .config
2496 .companies
2497 .first()
2498 .map(|c| c.code.as_str())
2499 .unwrap_or("1000");
2500 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2501 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2502 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2503 let fiscal_year = start_date.year() as u16;
2504 let owner_ids: Vec<String> = self
2505 .master_data
2506 .employees
2507 .iter()
2508 .take(5)
2509 .map(|e| e.employee_id.clone())
2510 .collect();
2511 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2512
2513 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2515 let spend_analyses =
2516 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2517
2518 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2520 let sourcing_projects = if owner_ids.is_empty() {
2521 Vec::new()
2522 } else {
2523 project_gen.generate(
2524 company_code,
2525 &categories_with_spend,
2526 &owner_ids,
2527 start_date,
2528 self.config.global.period_months,
2529 )
2530 };
2531 stats.sourcing_project_count = sourcing_projects.len();
2532
2533 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2535 let mut qual_gen = QualificationGenerator::new(seed + 2);
2536 let qualifications = qual_gen.generate(
2537 company_code,
2538 &qual_vendor_ids,
2539 sourcing_projects.first().map(|p| p.project_id.as_str()),
2540 owner_id,
2541 start_date,
2542 );
2543
2544 let mut rfx_gen = RfxGenerator::new(seed + 3);
2546 let rfx_events: Vec<RfxEvent> = sourcing_projects
2547 .iter()
2548 .map(|proj| {
2549 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2550 rfx_gen.generate(
2551 company_code,
2552 &proj.project_id,
2553 &proj.category_id,
2554 &qualified_vids,
2555 owner_id,
2556 start_date,
2557 50000.0,
2558 )
2559 })
2560 .collect();
2561 stats.rfx_event_count = rfx_events.len();
2562
2563 let mut bid_gen = BidGenerator::new(seed + 4);
2565 let mut all_bids = Vec::new();
2566 for rfx in &rfx_events {
2567 let bidder_count = vendor_ids.len().clamp(2, 5);
2568 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2569 let bids = bid_gen.generate(rfx, &responding, start_date);
2570 all_bids.extend(bids);
2571 }
2572 stats.bid_count = all_bids.len();
2573
2574 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2576 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2577 .iter()
2578 .map(|rfx| {
2579 let rfx_bids: Vec<SupplierBid> = all_bids
2580 .iter()
2581 .filter(|b| b.rfx_id == rfx.rfx_id)
2582 .cloned()
2583 .collect();
2584 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2585 })
2586 .collect();
2587
2588 let mut contract_gen = ContractGenerator::new(seed + 6);
2590 let contracts: Vec<ProcurementContract> = bid_evaluations
2591 .iter()
2592 .zip(rfx_events.iter())
2593 .filter_map(|(eval, rfx)| {
2594 eval.ranked_bids.first().and_then(|winner| {
2595 all_bids
2596 .iter()
2597 .find(|b| b.bid_id == winner.bid_id)
2598 .map(|winning_bid| {
2599 contract_gen.generate_from_bid(
2600 winning_bid,
2601 Some(&rfx.sourcing_project_id),
2602 &rfx.category_id,
2603 owner_id,
2604 start_date,
2605 )
2606 })
2607 })
2608 })
2609 .collect();
2610 stats.contract_count = contracts.len();
2611
2612 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2614 let catalog_items = catalog_gen.generate(&contracts);
2615 stats.catalog_item_count = catalog_items.len();
2616
2617 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2619 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2620 .iter()
2621 .fold(
2622 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2623 |mut acc, c| {
2624 acc.entry(c.vendor_id.clone()).or_default().push(c);
2625 acc
2626 },
2627 )
2628 .into_iter()
2629 .collect();
2630 let scorecards = scorecard_gen.generate(
2631 company_code,
2632 &vendor_contracts,
2633 start_date,
2634 end_date,
2635 owner_id,
2636 );
2637 stats.scorecard_count = scorecards.len();
2638
2639 let mut sourcing_projects = sourcing_projects;
2642 for project in &mut sourcing_projects {
2643 project.rfx_ids = rfx_events
2645 .iter()
2646 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2647 .map(|rfx| rfx.rfx_id.clone())
2648 .collect();
2649
2650 project.contract_id = contracts
2652 .iter()
2653 .find(|c| {
2654 c.sourcing_project_id
2655 .as_deref()
2656 .is_some_and(|sp| sp == project.project_id)
2657 })
2658 .map(|c| c.contract_id.clone());
2659
2660 project.spend_analysis_id = spend_analyses
2662 .iter()
2663 .find(|sa| sa.category_id == project.category_id)
2664 .map(|sa| sa.category_id.clone());
2665 }
2666
2667 info!(
2668 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2669 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2670 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2671 );
2672 self.check_resources_with_log("post-sourcing")?;
2673
2674 Ok(SourcingSnapshot {
2675 spend_analyses,
2676 sourcing_projects,
2677 qualifications,
2678 rfx_events,
2679 bids: all_bids,
2680 bid_evaluations,
2681 contracts,
2682 catalog_items,
2683 scorecards,
2684 })
2685 }
2686
2687 fn phase_intercompany(
2689 &mut self,
2690 stats: &mut EnhancedGenerationStatistics,
2691 ) -> SynthResult<IntercompanySnapshot> {
2692 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2694 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2695 return Ok(IntercompanySnapshot::default());
2696 }
2697
2698 if self.config.companies.len() < 2 {
2700 debug!(
2701 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2702 self.config.companies.len()
2703 );
2704 return Ok(IntercompanySnapshot::default());
2705 }
2706
2707 info!("Phase 14b: Generating Intercompany Transactions");
2708
2709 let seed = self.seed;
2710 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2711 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2712 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2713
2714 let parent_code = self.config.companies[0].code.clone();
2717 let mut ownership_structure =
2718 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2719
2720 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2721 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2722 format!("REL{:03}", i + 1),
2723 parent_code.clone(),
2724 company.code.clone(),
2725 rust_decimal::Decimal::from(100), start_date,
2727 );
2728 ownership_structure.add_relationship(relationship);
2729 }
2730
2731 let tp_method = match self.config.intercompany.transfer_pricing_method {
2733 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2734 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2735 }
2736 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2737 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2738 }
2739 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2740 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2741 }
2742 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2743 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2744 }
2745 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2746 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2747 }
2748 };
2749
2750 let ic_currency = self
2752 .config
2753 .companies
2754 .first()
2755 .map(|c| c.currency.clone())
2756 .unwrap_or_else(|| "USD".to_string());
2757 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2758 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2759 transfer_pricing_method: tp_method,
2760 markup_percent: rust_decimal::Decimal::from_f64_retain(
2761 self.config.intercompany.markup_percent,
2762 )
2763 .unwrap_or(rust_decimal::Decimal::from(5)),
2764 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2765 default_currency: ic_currency,
2766 ..Default::default()
2767 };
2768
2769 let mut ic_generator = datasynth_generators::ICGenerator::new(
2771 ic_gen_config,
2772 ownership_structure.clone(),
2773 seed + 50,
2774 );
2775
2776 let transactions_per_day = 3;
2779 let matched_pairs = ic_generator.generate_transactions_for_period(
2780 start_date,
2781 end_date,
2782 transactions_per_day,
2783 );
2784
2785 let mut seller_entries = Vec::new();
2787 let mut buyer_entries = Vec::new();
2788 let fiscal_year = start_date.year();
2789
2790 for pair in &matched_pairs {
2791 let fiscal_period = pair.posting_date.month();
2792 let (seller_je, buyer_je) =
2793 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2794 seller_entries.push(seller_je);
2795 buyer_entries.push(buyer_je);
2796 }
2797
2798 let matching_config = datasynth_generators::ICMatchingConfig {
2800 base_currency: self
2801 .config
2802 .companies
2803 .first()
2804 .map(|c| c.currency.clone())
2805 .unwrap_or_else(|| "USD".to_string()),
2806 ..Default::default()
2807 };
2808 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2809 matching_engine.load_matched_pairs(&matched_pairs);
2810 let matching_result = matching_engine.run_matching(end_date);
2811
2812 let mut elimination_entries = Vec::new();
2814 if self.config.intercompany.generate_eliminations {
2815 let elim_config = datasynth_generators::EliminationConfig {
2816 consolidation_entity: "GROUP".to_string(),
2817 base_currency: self
2818 .config
2819 .companies
2820 .first()
2821 .map(|c| c.currency.clone())
2822 .unwrap_or_else(|| "USD".to_string()),
2823 ..Default::default()
2824 };
2825
2826 let mut elim_generator =
2827 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2828
2829 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2830 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2831 matching_result
2832 .matched_balances
2833 .iter()
2834 .chain(matching_result.unmatched_balances.iter())
2835 .cloned()
2836 .collect();
2837
2838 let journal = elim_generator.generate_eliminations(
2839 &fiscal_period,
2840 end_date,
2841 &all_balances,
2842 &matched_pairs,
2843 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2846
2847 elimination_entries = journal.entries.clone();
2848 }
2849
2850 let matched_pair_count = matched_pairs.len();
2851 let elimination_entry_count = elimination_entries.len();
2852 let match_rate = matching_result.match_rate;
2853
2854 stats.ic_matched_pair_count = matched_pair_count;
2855 stats.ic_elimination_count = elimination_entry_count;
2856 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2857
2858 info!(
2859 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2860 matched_pair_count,
2861 stats.ic_transaction_count,
2862 seller_entries.len(),
2863 buyer_entries.len(),
2864 elimination_entry_count,
2865 match_rate * 100.0
2866 );
2867 self.check_resources_with_log("post-intercompany")?;
2868
2869 Ok(IntercompanySnapshot {
2870 matched_pairs,
2871 seller_journal_entries: seller_entries,
2872 buyer_journal_entries: buyer_entries,
2873 elimination_entries,
2874 matched_pair_count,
2875 elimination_entry_count,
2876 match_rate,
2877 })
2878 }
2879
2880 fn phase_financial_reporting(
2882 &mut self,
2883 document_flows: &DocumentFlowSnapshot,
2884 journal_entries: &[JournalEntry],
2885 coa: &Arc<ChartOfAccounts>,
2886 stats: &mut EnhancedGenerationStatistics,
2887 ) -> SynthResult<FinancialReportingSnapshot> {
2888 let fs_enabled = self.phase_config.generate_financial_statements
2889 || self.config.financial_reporting.enabled;
2890 let br_enabled = self.phase_config.generate_bank_reconciliation;
2891
2892 if !fs_enabled && !br_enabled {
2893 debug!("Phase 15: Skipped (financial reporting disabled)");
2894 return Ok(FinancialReportingSnapshot::default());
2895 }
2896
2897 info!("Phase 15: Generating Financial Reporting Data");
2898
2899 let seed = self.seed;
2900 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2901 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2902
2903 let mut financial_statements = Vec::new();
2904 let mut bank_reconciliations = Vec::new();
2905 let mut trial_balances = Vec::new();
2906
2907 if fs_enabled {
2915 let company_code = self
2916 .config
2917 .companies
2918 .first()
2919 .map(|c| c.code.as_str())
2920 .unwrap_or("1000");
2921 let currency = self
2922 .config
2923 .companies
2924 .first()
2925 .map(|c| c.currency.as_str())
2926 .unwrap_or("USD");
2927 let has_journal_entries = !journal_entries.is_empty();
2928
2929 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2932
2933 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2935 None;
2936
2937 for period in 0..self.config.global.period_months {
2939 let period_start = start_date + chrono::Months::new(period);
2940 let period_end =
2941 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2942 let fiscal_year = period_end.year() as u16;
2943 let fiscal_period = period_end.month() as u8;
2944
2945 if has_journal_entries {
2946 let tb_entries = Self::build_cumulative_trial_balance(
2949 journal_entries,
2950 coa,
2951 company_code,
2952 start_date,
2953 period_end,
2954 fiscal_year,
2955 fiscal_period,
2956 );
2957
2958 let prior_ref = prior_cumulative_tb.as_deref();
2961 let stmts = fs_gen.generate(
2962 company_code,
2963 currency,
2964 &tb_entries,
2965 period_start,
2966 period_end,
2967 fiscal_year,
2968 fiscal_period,
2969 prior_ref,
2970 "SYS-AUTOCLOSE",
2971 );
2972
2973 for stmt in stmts {
2975 if stmt.statement_type == StatementType::CashFlowStatement {
2976 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2978 let cf_items = Self::build_cash_flow_from_trial_balances(
2979 &tb_entries,
2980 prior_ref,
2981 net_income,
2982 );
2983 financial_statements.push(FinancialStatement {
2984 cash_flow_items: cf_items,
2985 ..stmt
2986 });
2987 } else {
2988 financial_statements.push(stmt);
2989 }
2990 }
2991
2992 trial_balances.push(PeriodTrialBalance {
2994 fiscal_year,
2995 fiscal_period,
2996 period_start,
2997 period_end,
2998 entries: tb_entries.clone(),
2999 });
3000
3001 prior_cumulative_tb = Some(tb_entries);
3003 } else {
3004 let tb_entries = Self::build_trial_balance_from_entries(
3007 journal_entries,
3008 coa,
3009 company_code,
3010 fiscal_year,
3011 fiscal_period,
3012 );
3013
3014 let stmts = fs_gen.generate(
3015 company_code,
3016 currency,
3017 &tb_entries,
3018 period_start,
3019 period_end,
3020 fiscal_year,
3021 fiscal_period,
3022 None,
3023 "SYS-AUTOCLOSE",
3024 );
3025 financial_statements.extend(stmts);
3026
3027 if !tb_entries.is_empty() {
3029 trial_balances.push(PeriodTrialBalance {
3030 fiscal_year,
3031 fiscal_period,
3032 period_start,
3033 period_end,
3034 entries: tb_entries,
3035 });
3036 }
3037 }
3038 }
3039 stats.financial_statement_count = financial_statements.len();
3040 info!(
3041 "Financial statements generated: {} statements (JE-derived: {})",
3042 stats.financial_statement_count, has_journal_entries
3043 );
3044 }
3045
3046 if br_enabled && !document_flows.payments.is_empty() {
3048 let employee_ids: Vec<String> = self
3049 .master_data
3050 .employees
3051 .iter()
3052 .map(|e| e.employee_id.clone())
3053 .collect();
3054 let mut br_gen =
3055 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3056
3057 for company in &self.config.companies {
3059 let company_payments: Vec<PaymentReference> = document_flows
3060 .payments
3061 .iter()
3062 .filter(|p| p.header.company_code == company.code)
3063 .map(|p| PaymentReference {
3064 id: p.header.document_id.clone(),
3065 amount: if p.is_vendor { p.amount } else { -p.amount },
3066 date: p.header.document_date,
3067 reference: p
3068 .check_number
3069 .clone()
3070 .or_else(|| p.wire_reference.clone())
3071 .unwrap_or_else(|| p.header.document_id.clone()),
3072 })
3073 .collect();
3074
3075 if company_payments.is_empty() {
3076 continue;
3077 }
3078
3079 let bank_account_id = format!("{}-MAIN", company.code);
3080
3081 for period in 0..self.config.global.period_months {
3083 let period_start = start_date + chrono::Months::new(period);
3084 let period_end =
3085 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3086
3087 let period_payments: Vec<PaymentReference> = company_payments
3088 .iter()
3089 .filter(|p| p.date >= period_start && p.date <= period_end)
3090 .cloned()
3091 .collect();
3092
3093 let recon = br_gen.generate(
3094 &company.code,
3095 &bank_account_id,
3096 period_start,
3097 period_end,
3098 &company.currency,
3099 &period_payments,
3100 );
3101 bank_reconciliations.push(recon);
3102 }
3103 }
3104 info!(
3105 "Bank reconciliations generated: {} reconciliations",
3106 bank_reconciliations.len()
3107 );
3108 }
3109
3110 stats.bank_reconciliation_count = bank_reconciliations.len();
3111 self.check_resources_with_log("post-financial-reporting")?;
3112
3113 if !trial_balances.is_empty() {
3114 info!(
3115 "Period-close trial balances captured: {} periods",
3116 trial_balances.len()
3117 );
3118 }
3119
3120 Ok(FinancialReportingSnapshot {
3121 financial_statements,
3122 bank_reconciliations,
3123 trial_balances,
3124 })
3125 }
3126
3127 fn build_trial_balance_from_entries(
3133 journal_entries: &[JournalEntry],
3134 coa: &ChartOfAccounts,
3135 company_code: &str,
3136 fiscal_year: u16,
3137 fiscal_period: u8,
3138 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3139 use rust_decimal::Decimal;
3140
3141 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3143 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3144
3145 for je in journal_entries {
3146 if je.header.company_code != company_code
3148 || je.header.fiscal_year != fiscal_year
3149 || je.header.fiscal_period != fiscal_period
3150 {
3151 continue;
3152 }
3153
3154 for line in &je.lines {
3155 let acct = &line.gl_account;
3156 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3157 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3158 }
3159 }
3160
3161 let mut all_accounts: Vec<&String> = account_debits
3163 .keys()
3164 .chain(account_credits.keys())
3165 .collect::<std::collections::HashSet<_>>()
3166 .into_iter()
3167 .collect();
3168 all_accounts.sort();
3169
3170 let mut entries = Vec::new();
3171
3172 for acct_number in all_accounts {
3173 let debit = account_debits
3174 .get(acct_number)
3175 .copied()
3176 .unwrap_or(Decimal::ZERO);
3177 let credit = account_credits
3178 .get(acct_number)
3179 .copied()
3180 .unwrap_or(Decimal::ZERO);
3181
3182 if debit.is_zero() && credit.is_zero() {
3183 continue;
3184 }
3185
3186 let account_name = coa
3188 .get_account(acct_number)
3189 .map(|gl| gl.short_description.clone())
3190 .unwrap_or_else(|| format!("Account {}", acct_number));
3191
3192 let category = Self::category_from_account_code(acct_number);
3197
3198 entries.push(datasynth_generators::TrialBalanceEntry {
3199 account_code: acct_number.clone(),
3200 account_name,
3201 category,
3202 debit_balance: debit,
3203 credit_balance: credit,
3204 });
3205 }
3206
3207 entries
3208 }
3209
3210 fn build_cumulative_trial_balance(
3217 journal_entries: &[JournalEntry],
3218 coa: &ChartOfAccounts,
3219 company_code: &str,
3220 start_date: NaiveDate,
3221 period_end: NaiveDate,
3222 fiscal_year: u16,
3223 fiscal_period: u8,
3224 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3225 use rust_decimal::Decimal;
3226
3227 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3229 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3230
3231 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3233 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3234
3235 for je in journal_entries {
3236 if je.header.company_code != company_code {
3237 continue;
3238 }
3239
3240 for line in &je.lines {
3241 let acct = &line.gl_account;
3242 let category = Self::category_from_account_code(acct);
3243 let is_bs_account = matches!(
3244 category.as_str(),
3245 "Cash"
3246 | "Receivables"
3247 | "Inventory"
3248 | "FixedAssets"
3249 | "Payables"
3250 | "AccruedLiabilities"
3251 | "LongTermDebt"
3252 | "Equity"
3253 );
3254
3255 if is_bs_account {
3256 if je.header.document_date <= period_end
3258 && je.header.document_date >= start_date
3259 {
3260 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3261 line.debit_amount;
3262 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3263 line.credit_amount;
3264 }
3265 } else {
3266 if je.header.fiscal_year == fiscal_year
3268 && je.header.fiscal_period == fiscal_period
3269 {
3270 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3271 line.debit_amount;
3272 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3273 line.credit_amount;
3274 }
3275 }
3276 }
3277 }
3278
3279 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3281 all_accounts.extend(bs_debits.keys().cloned());
3282 all_accounts.extend(bs_credits.keys().cloned());
3283 all_accounts.extend(is_debits.keys().cloned());
3284 all_accounts.extend(is_credits.keys().cloned());
3285
3286 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3287 sorted_accounts.sort();
3288
3289 let mut entries = Vec::new();
3290
3291 for acct_number in &sorted_accounts {
3292 let category = Self::category_from_account_code(acct_number);
3293 let is_bs_account = matches!(
3294 category.as_str(),
3295 "Cash"
3296 | "Receivables"
3297 | "Inventory"
3298 | "FixedAssets"
3299 | "Payables"
3300 | "AccruedLiabilities"
3301 | "LongTermDebt"
3302 | "Equity"
3303 );
3304
3305 let (debit, credit) = if is_bs_account {
3306 (
3307 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3308 bs_credits
3309 .get(acct_number)
3310 .copied()
3311 .unwrap_or(Decimal::ZERO),
3312 )
3313 } else {
3314 (
3315 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3316 is_credits
3317 .get(acct_number)
3318 .copied()
3319 .unwrap_or(Decimal::ZERO),
3320 )
3321 };
3322
3323 if debit.is_zero() && credit.is_zero() {
3324 continue;
3325 }
3326
3327 let account_name = coa
3328 .get_account(acct_number)
3329 .map(|gl| gl.short_description.clone())
3330 .unwrap_or_else(|| format!("Account {}", acct_number));
3331
3332 entries.push(datasynth_generators::TrialBalanceEntry {
3333 account_code: acct_number.clone(),
3334 account_name,
3335 category,
3336 debit_balance: debit,
3337 credit_balance: credit,
3338 });
3339 }
3340
3341 entries
3342 }
3343
3344 fn build_cash_flow_from_trial_balances(
3349 current_tb: &[datasynth_generators::TrialBalanceEntry],
3350 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3351 net_income: rust_decimal::Decimal,
3352 ) -> Vec<CashFlowItem> {
3353 use rust_decimal::Decimal;
3354
3355 let aggregate =
3357 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3358 let mut map: HashMap<String, Decimal> = HashMap::new();
3359 for entry in tb {
3360 let net = entry.debit_balance - entry.credit_balance;
3361 *map.entry(entry.category.clone()).or_default() += net;
3362 }
3363 map
3364 };
3365
3366 let current = aggregate(current_tb);
3367 let prior = prior_tb.map(aggregate);
3368
3369 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3371 *map.get(key).unwrap_or(&Decimal::ZERO)
3372 };
3373
3374 let change = |key: &str| -> Decimal {
3376 let curr = get(¤t, key);
3377 match &prior {
3378 Some(p) => curr - get(p, key),
3379 None => curr,
3380 }
3381 };
3382
3383 let fixed_asset_change = change("FixedAssets");
3386 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3387 -fixed_asset_change
3388 } else {
3389 Decimal::ZERO
3390 };
3391
3392 let ar_change = change("Receivables");
3394 let inventory_change = change("Inventory");
3395 let ap_change = change("Payables");
3397 let accrued_change = change("AccruedLiabilities");
3398
3399 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3400 + (-ap_change)
3401 + (-accrued_change);
3402
3403 let capex = if fixed_asset_change > Decimal::ZERO {
3405 -fixed_asset_change
3406 } else {
3407 Decimal::ZERO
3408 };
3409 let investing_cf = capex;
3410
3411 let debt_change = -change("LongTermDebt");
3413 let equity_change = -change("Equity");
3414 let financing_cf = debt_change + equity_change;
3415
3416 let net_change = operating_cf + investing_cf + financing_cf;
3417
3418 vec![
3419 CashFlowItem {
3420 item_code: "CF-NI".to_string(),
3421 label: "Net Income".to_string(),
3422 category: CashFlowCategory::Operating,
3423 amount: net_income,
3424 amount_prior: None,
3425 sort_order: 1,
3426 is_total: false,
3427 },
3428 CashFlowItem {
3429 item_code: "CF-DEP".to_string(),
3430 label: "Depreciation & Amortization".to_string(),
3431 category: CashFlowCategory::Operating,
3432 amount: depreciation_addback,
3433 amount_prior: None,
3434 sort_order: 2,
3435 is_total: false,
3436 },
3437 CashFlowItem {
3438 item_code: "CF-AR".to_string(),
3439 label: "Change in Accounts Receivable".to_string(),
3440 category: CashFlowCategory::Operating,
3441 amount: -ar_change,
3442 amount_prior: None,
3443 sort_order: 3,
3444 is_total: false,
3445 },
3446 CashFlowItem {
3447 item_code: "CF-AP".to_string(),
3448 label: "Change in Accounts Payable".to_string(),
3449 category: CashFlowCategory::Operating,
3450 amount: -ap_change,
3451 amount_prior: None,
3452 sort_order: 4,
3453 is_total: false,
3454 },
3455 CashFlowItem {
3456 item_code: "CF-INV".to_string(),
3457 label: "Change in Inventory".to_string(),
3458 category: CashFlowCategory::Operating,
3459 amount: -inventory_change,
3460 amount_prior: None,
3461 sort_order: 5,
3462 is_total: false,
3463 },
3464 CashFlowItem {
3465 item_code: "CF-OP".to_string(),
3466 label: "Net Cash from Operating Activities".to_string(),
3467 category: CashFlowCategory::Operating,
3468 amount: operating_cf,
3469 amount_prior: None,
3470 sort_order: 6,
3471 is_total: true,
3472 },
3473 CashFlowItem {
3474 item_code: "CF-CAPEX".to_string(),
3475 label: "Capital Expenditures".to_string(),
3476 category: CashFlowCategory::Investing,
3477 amount: capex,
3478 amount_prior: None,
3479 sort_order: 7,
3480 is_total: false,
3481 },
3482 CashFlowItem {
3483 item_code: "CF-INV-T".to_string(),
3484 label: "Net Cash from Investing Activities".to_string(),
3485 category: CashFlowCategory::Investing,
3486 amount: investing_cf,
3487 amount_prior: None,
3488 sort_order: 8,
3489 is_total: true,
3490 },
3491 CashFlowItem {
3492 item_code: "CF-DEBT".to_string(),
3493 label: "Net Borrowings / (Repayments)".to_string(),
3494 category: CashFlowCategory::Financing,
3495 amount: debt_change,
3496 amount_prior: None,
3497 sort_order: 9,
3498 is_total: false,
3499 },
3500 CashFlowItem {
3501 item_code: "CF-EQ".to_string(),
3502 label: "Equity Changes".to_string(),
3503 category: CashFlowCategory::Financing,
3504 amount: equity_change,
3505 amount_prior: None,
3506 sort_order: 10,
3507 is_total: false,
3508 },
3509 CashFlowItem {
3510 item_code: "CF-FIN-T".to_string(),
3511 label: "Net Cash from Financing Activities".to_string(),
3512 category: CashFlowCategory::Financing,
3513 amount: financing_cf,
3514 amount_prior: None,
3515 sort_order: 11,
3516 is_total: true,
3517 },
3518 CashFlowItem {
3519 item_code: "CF-NET".to_string(),
3520 label: "Net Change in Cash".to_string(),
3521 category: CashFlowCategory::Operating,
3522 amount: net_change,
3523 amount_prior: None,
3524 sort_order: 12,
3525 is_total: true,
3526 },
3527 ]
3528 }
3529
3530 fn calculate_net_income_from_tb(
3534 tb: &[datasynth_generators::TrialBalanceEntry],
3535 ) -> rust_decimal::Decimal {
3536 use rust_decimal::Decimal;
3537
3538 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3539 for entry in tb {
3540 let net = entry.debit_balance - entry.credit_balance;
3541 *aggregated.entry(entry.category.clone()).or_default() += net;
3542 }
3543
3544 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3545 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3546 let opex = *aggregated
3547 .get("OperatingExpenses")
3548 .unwrap_or(&Decimal::ZERO);
3549 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3550 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3551
3552 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3555 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3556 let tax = operating_income * tax_rate;
3557 operating_income - tax
3558 }
3559
3560 fn category_from_account_code(code: &str) -> String {
3567 let prefix: String = code.chars().take(2).collect();
3568 match prefix.as_str() {
3569 "10" => "Cash",
3570 "11" => "Receivables",
3571 "12" | "13" | "14" => "Inventory",
3572 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3573 "20" => "Payables",
3574 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3575 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3576 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3577 "40" | "41" | "42" | "43" | "44" => "Revenue",
3578 "50" | "51" | "52" => "CostOfSales",
3579 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3580 "OperatingExpenses"
3581 }
3582 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3583 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3584 _ => "OperatingExpenses",
3585 }
3586 .to_string()
3587 }
3588
3589 fn phase_hr_data(
3591 &mut self,
3592 stats: &mut EnhancedGenerationStatistics,
3593 ) -> SynthResult<HrSnapshot> {
3594 if !self.config.hr.enabled {
3595 debug!("Phase 16: Skipped (HR generation disabled)");
3596 return Ok(HrSnapshot::default());
3597 }
3598
3599 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3600
3601 let seed = self.seed;
3602 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3603 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3604 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3605 let company_code = self
3606 .config
3607 .companies
3608 .first()
3609 .map(|c| c.code.as_str())
3610 .unwrap_or("1000");
3611 let currency = self
3612 .config
3613 .companies
3614 .first()
3615 .map(|c| c.currency.as_str())
3616 .unwrap_or("USD");
3617
3618 let employee_ids: Vec<String> = self
3619 .master_data
3620 .employees
3621 .iter()
3622 .map(|e| e.employee_id.clone())
3623 .collect();
3624
3625 if employee_ids.is_empty() {
3626 debug!("Phase 16: Skipped (no employees available)");
3627 return Ok(HrSnapshot::default());
3628 }
3629
3630 let cost_center_ids: Vec<String> = self
3633 .master_data
3634 .employees
3635 .iter()
3636 .filter_map(|e| e.cost_center.clone())
3637 .collect::<std::collections::HashSet<_>>()
3638 .into_iter()
3639 .collect();
3640
3641 let mut snapshot = HrSnapshot::default();
3642
3643 if self.config.hr.payroll.enabled {
3645 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3646 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3647
3648 let payroll_pack = self.primary_pack();
3650
3651 payroll_gen.set_country_pack(payroll_pack.clone());
3654
3655 let employees_with_salary: Vec<(
3656 String,
3657 rust_decimal::Decimal,
3658 Option<String>,
3659 Option<String>,
3660 )> = self
3661 .master_data
3662 .employees
3663 .iter()
3664 .map(|e| {
3665 (
3666 e.employee_id.clone(),
3667 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3669 e.department_id.clone(),
3670 )
3671 })
3672 .collect();
3673
3674 for month in 0..self.config.global.period_months {
3675 let period_start = start_date + chrono::Months::new(month);
3676 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3677 let (run, items) = payroll_gen.generate(
3678 company_code,
3679 &employees_with_salary,
3680 period_start,
3681 period_end,
3682 currency,
3683 );
3684 snapshot.payroll_runs.push(run);
3685 snapshot.payroll_run_count += 1;
3686 snapshot.payroll_line_item_count += items.len();
3687 snapshot.payroll_line_items.extend(items);
3688 }
3689 }
3690
3691 if self.config.hr.time_attendance.enabled {
3693 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3694 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3695 let entries = time_gen.generate(
3696 &employee_ids,
3697 start_date,
3698 end_date,
3699 &self.config.hr.time_attendance,
3700 );
3701 snapshot.time_entry_count = entries.len();
3702 snapshot.time_entries = entries;
3703 }
3704
3705 if self.config.hr.expenses.enabled {
3707 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3708 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3709 let company_currency = self
3710 .config
3711 .companies
3712 .first()
3713 .map(|c| c.currency.as_str())
3714 .unwrap_or("USD");
3715 let reports = expense_gen.generate_with_currency(
3716 &employee_ids,
3717 start_date,
3718 end_date,
3719 &self.config.hr.expenses,
3720 company_currency,
3721 );
3722 snapshot.expense_report_count = reports.len();
3723 snapshot.expense_reports = reports;
3724 }
3725
3726 stats.payroll_run_count = snapshot.payroll_run_count;
3727 stats.time_entry_count = snapshot.time_entry_count;
3728 stats.expense_report_count = snapshot.expense_report_count;
3729
3730 info!(
3731 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3732 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3733 snapshot.time_entry_count, snapshot.expense_report_count
3734 );
3735 self.check_resources_with_log("post-hr")?;
3736
3737 Ok(snapshot)
3738 }
3739
3740 fn phase_accounting_standards(
3742 &mut self,
3743 stats: &mut EnhancedGenerationStatistics,
3744 ) -> SynthResult<AccountingStandardsSnapshot> {
3745 if !self.phase_config.generate_accounting_standards
3746 || !self.config.accounting_standards.enabled
3747 {
3748 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3749 return Ok(AccountingStandardsSnapshot::default());
3750 }
3751 info!("Phase 17: Generating Accounting Standards Data");
3752
3753 let seed = self.seed;
3754 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3755 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3756 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3757 let company_code = self
3758 .config
3759 .companies
3760 .first()
3761 .map(|c| c.code.as_str())
3762 .unwrap_or("1000");
3763 let currency = self
3764 .config
3765 .companies
3766 .first()
3767 .map(|c| c.currency.as_str())
3768 .unwrap_or("USD");
3769
3770 let framework = match self.config.accounting_standards.framework {
3775 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3776 datasynth_standards::framework::AccountingFramework::UsGaap
3777 }
3778 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3779 datasynth_standards::framework::AccountingFramework::Ifrs
3780 }
3781 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3782 datasynth_standards::framework::AccountingFramework::DualReporting
3783 }
3784 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3785 datasynth_standards::framework::AccountingFramework::FrenchGaap
3786 }
3787 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3788 datasynth_standards::framework::AccountingFramework::GermanGaap
3789 }
3790 None => {
3791 let pack = self.primary_pack();
3793 let pack_fw = pack.accounting.framework.as_str();
3794 match pack_fw {
3795 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3796 "dual_reporting" => {
3797 datasynth_standards::framework::AccountingFramework::DualReporting
3798 }
3799 "french_gaap" => {
3800 datasynth_standards::framework::AccountingFramework::FrenchGaap
3801 }
3802 "german_gaap" | "hgb" => {
3803 datasynth_standards::framework::AccountingFramework::GermanGaap
3804 }
3805 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3807 }
3808 }
3809 };
3810
3811 let mut snapshot = AccountingStandardsSnapshot::default();
3812
3813 if self.config.accounting_standards.revenue_recognition.enabled {
3815 let customer_ids: Vec<String> = self
3816 .master_data
3817 .customers
3818 .iter()
3819 .map(|c| c.customer_id.clone())
3820 .collect();
3821
3822 if !customer_ids.is_empty() {
3823 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3824 let contracts = rev_gen.generate(
3825 company_code,
3826 &customer_ids,
3827 start_date,
3828 end_date,
3829 currency,
3830 &self.config.accounting_standards.revenue_recognition,
3831 framework,
3832 );
3833 snapshot.revenue_contract_count = contracts.len();
3834 snapshot.contracts = contracts;
3835 }
3836 }
3837
3838 if self.config.accounting_standards.impairment.enabled {
3840 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3841 .master_data
3842 .assets
3843 .iter()
3844 .map(|a| {
3845 (
3846 a.asset_id.clone(),
3847 a.description.clone(),
3848 a.acquisition_cost,
3849 )
3850 })
3851 .collect();
3852
3853 if !asset_data.is_empty() {
3854 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3855 let tests = imp_gen.generate(
3856 company_code,
3857 &asset_data,
3858 end_date,
3859 &self.config.accounting_standards.impairment,
3860 framework,
3861 );
3862 snapshot.impairment_test_count = tests.len();
3863 snapshot.impairment_tests = tests;
3864 }
3865 }
3866
3867 stats.revenue_contract_count = snapshot.revenue_contract_count;
3868 stats.impairment_test_count = snapshot.impairment_test_count;
3869
3870 info!(
3871 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3872 snapshot.revenue_contract_count, snapshot.impairment_test_count
3873 );
3874 self.check_resources_with_log("post-accounting-standards")?;
3875
3876 Ok(snapshot)
3877 }
3878
3879 fn phase_manufacturing(
3881 &mut self,
3882 stats: &mut EnhancedGenerationStatistics,
3883 ) -> SynthResult<ManufacturingSnapshot> {
3884 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3885 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3886 return Ok(ManufacturingSnapshot::default());
3887 }
3888 info!("Phase 18: Generating Manufacturing Data");
3889
3890 let seed = self.seed;
3891 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3892 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3893 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3894 let company_code = self
3895 .config
3896 .companies
3897 .first()
3898 .map(|c| c.code.as_str())
3899 .unwrap_or("1000");
3900
3901 let material_data: Vec<(String, String)> = self
3902 .master_data
3903 .materials
3904 .iter()
3905 .map(|m| (m.material_id.clone(), m.description.clone()))
3906 .collect();
3907
3908 if material_data.is_empty() {
3909 debug!("Phase 18: Skipped (no materials available)");
3910 return Ok(ManufacturingSnapshot::default());
3911 }
3912
3913 let mut snapshot = ManufacturingSnapshot::default();
3914
3915 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3917 let production_orders = prod_gen.generate(
3918 company_code,
3919 &material_data,
3920 start_date,
3921 end_date,
3922 &self.config.manufacturing.production_orders,
3923 &self.config.manufacturing.costing,
3924 &self.config.manufacturing.routing,
3925 );
3926 snapshot.production_order_count = production_orders.len();
3927
3928 let inspection_data: Vec<(String, String, String)> = production_orders
3930 .iter()
3931 .map(|po| {
3932 (
3933 po.order_id.clone(),
3934 po.material_id.clone(),
3935 po.material_description.clone(),
3936 )
3937 })
3938 .collect();
3939
3940 snapshot.production_orders = production_orders;
3941
3942 if !inspection_data.is_empty() {
3943 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3944 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3945 snapshot.quality_inspection_count = inspections.len();
3946 snapshot.quality_inspections = inspections;
3947 }
3948
3949 let storage_locations: Vec<(String, String)> = material_data
3951 .iter()
3952 .enumerate()
3953 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3954 .collect();
3955
3956 let employee_ids: Vec<String> = self
3957 .master_data
3958 .employees
3959 .iter()
3960 .map(|e| e.employee_id.clone())
3961 .collect();
3962 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3963 .with_employee_pool(employee_ids);
3964 let mut cycle_count_total = 0usize;
3965 for month in 0..self.config.global.period_months {
3966 let count_date = start_date + chrono::Months::new(month);
3967 let items_per_count = storage_locations.len().clamp(10, 50);
3968 let cc = cc_gen.generate(
3969 company_code,
3970 &storage_locations,
3971 count_date,
3972 items_per_count,
3973 );
3974 snapshot.cycle_counts.push(cc);
3975 cycle_count_total += 1;
3976 }
3977 snapshot.cycle_count_count = cycle_count_total;
3978
3979 stats.production_order_count = snapshot.production_order_count;
3980 stats.quality_inspection_count = snapshot.quality_inspection_count;
3981 stats.cycle_count_count = snapshot.cycle_count_count;
3982
3983 info!(
3984 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3985 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3986 );
3987 self.check_resources_with_log("post-manufacturing")?;
3988
3989 Ok(snapshot)
3990 }
3991
3992 fn phase_sales_kpi_budgets(
3994 &mut self,
3995 coa: &Arc<ChartOfAccounts>,
3996 financial_reporting: &FinancialReportingSnapshot,
3997 stats: &mut EnhancedGenerationStatistics,
3998 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3999 if !self.phase_config.generate_sales_kpi_budgets {
4000 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4001 return Ok(SalesKpiBudgetsSnapshot::default());
4002 }
4003 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4004
4005 let seed = self.seed;
4006 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4007 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4008 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4009 let company_code = self
4010 .config
4011 .companies
4012 .first()
4013 .map(|c| c.code.as_str())
4014 .unwrap_or("1000");
4015
4016 let mut snapshot = SalesKpiBudgetsSnapshot::default();
4017
4018 if self.config.sales_quotes.enabled {
4020 let customer_data: Vec<(String, String)> = self
4021 .master_data
4022 .customers
4023 .iter()
4024 .map(|c| (c.customer_id.clone(), c.name.clone()))
4025 .collect();
4026 let material_data: Vec<(String, String)> = self
4027 .master_data
4028 .materials
4029 .iter()
4030 .map(|m| (m.material_id.clone(), m.description.clone()))
4031 .collect();
4032
4033 if !customer_data.is_empty() && !material_data.is_empty() {
4034 let employee_ids: Vec<String> = self
4035 .master_data
4036 .employees
4037 .iter()
4038 .map(|e| e.employee_id.clone())
4039 .collect();
4040 let customer_ids: Vec<String> = self
4041 .master_data
4042 .customers
4043 .iter()
4044 .map(|c| c.customer_id.clone())
4045 .collect();
4046 let company_currency = self
4047 .config
4048 .companies
4049 .first()
4050 .map(|c| c.currency.as_str())
4051 .unwrap_or("USD");
4052
4053 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4054 .with_pools(employee_ids, customer_ids);
4055 let quotes = quote_gen.generate_with_currency(
4056 company_code,
4057 &customer_data,
4058 &material_data,
4059 start_date,
4060 end_date,
4061 &self.config.sales_quotes,
4062 company_currency,
4063 );
4064 snapshot.sales_quote_count = quotes.len();
4065 snapshot.sales_quotes = quotes;
4066 }
4067 }
4068
4069 if self.config.financial_reporting.management_kpis.enabled {
4071 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4072 let mut kpis = kpi_gen.generate(
4073 company_code,
4074 start_date,
4075 end_date,
4076 &self.config.financial_reporting.management_kpis,
4077 );
4078
4079 {
4081 use rust_decimal::Decimal;
4082
4083 if let Some(income_stmt) =
4084 financial_reporting.financial_statements.iter().find(|fs| {
4085 fs.statement_type == StatementType::IncomeStatement
4086 && fs.company_code == company_code
4087 })
4088 {
4089 let total_revenue: Decimal = income_stmt
4091 .line_items
4092 .iter()
4093 .filter(|li| li.section.contains("Revenue") && !li.is_total)
4094 .map(|li| li.amount)
4095 .sum();
4096 let total_cogs: Decimal = income_stmt
4097 .line_items
4098 .iter()
4099 .filter(|li| {
4100 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4101 && !li.is_total
4102 })
4103 .map(|li| li.amount.abs())
4104 .sum();
4105 let total_opex: Decimal = income_stmt
4106 .line_items
4107 .iter()
4108 .filter(|li| {
4109 li.section.contains("Expense")
4110 && !li.is_total
4111 && !li.section.contains("Cost")
4112 })
4113 .map(|li| li.amount.abs())
4114 .sum();
4115
4116 if total_revenue > Decimal::ZERO {
4117 let hundred = Decimal::from(100);
4118 let gross_margin_pct =
4119 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4120 let operating_income = total_revenue - total_cogs - total_opex;
4121 let op_margin_pct =
4122 (operating_income * hundred / total_revenue).round_dp(2);
4123
4124 for kpi in &mut kpis {
4126 if kpi.name == "Gross Margin" {
4127 kpi.value = gross_margin_pct;
4128 } else if kpi.name == "Operating Margin" {
4129 kpi.value = op_margin_pct;
4130 }
4131 }
4132 }
4133 }
4134
4135 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4137 fs.statement_type == StatementType::BalanceSheet
4138 && fs.company_code == company_code
4139 }) {
4140 let current_assets: Decimal = bs
4141 .line_items
4142 .iter()
4143 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4144 .map(|li| li.amount)
4145 .sum();
4146 let current_liabilities: Decimal = bs
4147 .line_items
4148 .iter()
4149 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4150 .map(|li| li.amount.abs())
4151 .sum();
4152
4153 if current_liabilities > Decimal::ZERO {
4154 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4155 for kpi in &mut kpis {
4156 if kpi.name == "Current Ratio" {
4157 kpi.value = current_ratio;
4158 }
4159 }
4160 }
4161 }
4162 }
4163
4164 snapshot.kpi_count = kpis.len();
4165 snapshot.kpis = kpis;
4166 }
4167
4168 if self.config.financial_reporting.budgets.enabled {
4170 let account_data: Vec<(String, String)> = coa
4171 .accounts
4172 .iter()
4173 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4174 .collect();
4175
4176 if !account_data.is_empty() {
4177 let fiscal_year = start_date.year() as u32;
4178 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4179 let budget = budget_gen.generate(
4180 company_code,
4181 fiscal_year,
4182 &account_data,
4183 &self.config.financial_reporting.budgets,
4184 );
4185 snapshot.budget_line_count = budget.line_items.len();
4186 snapshot.budgets.push(budget);
4187 }
4188 }
4189
4190 stats.sales_quote_count = snapshot.sales_quote_count;
4191 stats.kpi_count = snapshot.kpi_count;
4192 stats.budget_line_count = snapshot.budget_line_count;
4193
4194 info!(
4195 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4196 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4197 );
4198 self.check_resources_with_log("post-sales-kpi-budgets")?;
4199
4200 Ok(snapshot)
4201 }
4202
4203 fn phase_tax_generation(
4205 &mut self,
4206 document_flows: &DocumentFlowSnapshot,
4207 stats: &mut EnhancedGenerationStatistics,
4208 ) -> SynthResult<TaxSnapshot> {
4209 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4210 debug!("Phase 20: Skipped (tax generation disabled)");
4211 return Ok(TaxSnapshot::default());
4212 }
4213 info!("Phase 20: Generating Tax Data");
4214
4215 let seed = self.seed;
4216 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4217 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4218 let fiscal_year = start_date.year();
4219 let company_code = self
4220 .config
4221 .companies
4222 .first()
4223 .map(|c| c.code.as_str())
4224 .unwrap_or("1000");
4225
4226 let mut gen =
4227 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4228
4229 let pack = self.primary_pack().clone();
4230 let (jurisdictions, codes) =
4231 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4232
4233 let mut provisions = Vec::new();
4235 if self.config.tax.provisions.enabled {
4236 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4237 for company in &self.config.companies {
4238 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4239 let statutory_rate = rust_decimal::Decimal::new(
4240 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4241 2,
4242 );
4243 let provision = provision_gen.generate(
4244 &company.code,
4245 start_date,
4246 pre_tax_income,
4247 statutory_rate,
4248 );
4249 provisions.push(provision);
4250 }
4251 }
4252
4253 let mut tax_lines = Vec::new();
4255 if !codes.is_empty() {
4256 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4257 datasynth_generators::TaxLineGeneratorConfig::default(),
4258 codes.clone(),
4259 seed + 72,
4260 );
4261
4262 let buyer_country = self
4265 .config
4266 .companies
4267 .first()
4268 .map(|c| c.country.as_str())
4269 .unwrap_or("US");
4270 for vi in &document_flows.vendor_invoices {
4271 let lines = tax_line_gen.generate_for_document(
4272 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4273 &vi.header.document_id,
4274 buyer_country, buyer_country,
4276 vi.payable_amount,
4277 vi.header.document_date,
4278 None,
4279 );
4280 tax_lines.extend(lines);
4281 }
4282
4283 for ci in &document_flows.customer_invoices {
4285 let lines = tax_line_gen.generate_for_document(
4286 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4287 &ci.header.document_id,
4288 buyer_country, buyer_country,
4290 ci.total_gross_amount,
4291 ci.header.document_date,
4292 None,
4293 );
4294 tax_lines.extend(lines);
4295 }
4296 }
4297
4298 let snapshot = TaxSnapshot {
4299 jurisdiction_count: jurisdictions.len(),
4300 code_count: codes.len(),
4301 jurisdictions,
4302 codes,
4303 tax_provisions: provisions,
4304 tax_lines,
4305 tax_returns: Vec::new(),
4306 withholding_records: Vec::new(),
4307 tax_anomaly_labels: Vec::new(),
4308 };
4309
4310 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4311 stats.tax_code_count = snapshot.code_count;
4312 stats.tax_provision_count = snapshot.tax_provisions.len();
4313 stats.tax_line_count = snapshot.tax_lines.len();
4314
4315 info!(
4316 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4317 snapshot.jurisdiction_count,
4318 snapshot.code_count,
4319 snapshot.tax_provisions.len()
4320 );
4321 self.check_resources_with_log("post-tax")?;
4322
4323 Ok(snapshot)
4324 }
4325
4326 fn phase_esg_generation(
4328 &mut self,
4329 document_flows: &DocumentFlowSnapshot,
4330 stats: &mut EnhancedGenerationStatistics,
4331 ) -> SynthResult<EsgSnapshot> {
4332 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4333 debug!("Phase 21: Skipped (ESG generation disabled)");
4334 return Ok(EsgSnapshot::default());
4335 }
4336 info!("Phase 21: Generating ESG Data");
4337
4338 let seed = self.seed;
4339 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4340 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4341 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4342 let entity_id = self
4343 .config
4344 .companies
4345 .first()
4346 .map(|c| c.code.as_str())
4347 .unwrap_or("1000");
4348
4349 let esg_cfg = &self.config.esg;
4350 let mut snapshot = EsgSnapshot::default();
4351
4352 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4354 esg_cfg.environmental.energy.clone(),
4355 seed + 80,
4356 );
4357 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4358
4359 let facility_count = esg_cfg.environmental.energy.facility_count;
4361 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4362 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4363
4364 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4366 seed + 82,
4367 esg_cfg.environmental.waste.diversion_target,
4368 facility_count,
4369 );
4370 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4371
4372 let mut emission_gen =
4374 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4375
4376 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4378 .iter()
4379 .map(|e| datasynth_generators::EnergyInput {
4380 facility_id: e.facility_id.clone(),
4381 energy_type: match e.energy_source {
4382 EnergySourceType::NaturalGas => {
4383 datasynth_generators::EnergyInputType::NaturalGas
4384 }
4385 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4386 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4387 _ => datasynth_generators::EnergyInputType::Electricity,
4388 },
4389 consumption_kwh: e.consumption_kwh,
4390 period: e.period,
4391 })
4392 .collect();
4393
4394 let mut emissions = Vec::new();
4395 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4396 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4397
4398 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4400 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4401 for payment in &document_flows.payments {
4402 if payment.is_vendor {
4403 *totals
4404 .entry(payment.business_partner_id.clone())
4405 .or_default() += payment.amount;
4406 }
4407 }
4408 totals
4409 };
4410 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4411 .master_data
4412 .vendors
4413 .iter()
4414 .map(|v| {
4415 let spend = vendor_payment_totals
4416 .get(&v.vendor_id)
4417 .copied()
4418 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4419 datasynth_generators::VendorSpendInput {
4420 vendor_id: v.vendor_id.clone(),
4421 category: format!("{:?}", v.vendor_type).to_lowercase(),
4422 spend,
4423 country: v.country.clone(),
4424 }
4425 })
4426 .collect();
4427 if !vendor_spend.is_empty() {
4428 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4429 entity_id,
4430 &vendor_spend,
4431 start_date,
4432 end_date,
4433 ));
4434 }
4435
4436 let headcount = self.master_data.employees.len() as u32;
4438 if headcount > 0 {
4439 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4440 emissions.extend(emission_gen.generate_scope3_business_travel(
4441 entity_id,
4442 travel_spend,
4443 start_date,
4444 ));
4445 emissions
4446 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4447 }
4448
4449 snapshot.emission_count = emissions.len();
4450 snapshot.emissions = emissions;
4451 snapshot.energy = energy_records;
4452
4453 let mut workforce_gen =
4455 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4456 let total_headcount = headcount.max(100);
4457 snapshot.diversity =
4458 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4459 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4460 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4461 entity_id,
4462 facility_count,
4463 start_date,
4464 end_date,
4465 );
4466
4467 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4470 entity_id,
4471 &snapshot.safety_incidents,
4472 total_hours,
4473 start_date,
4474 );
4475 snapshot.safety_metrics = vec![safety_metric];
4476
4477 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4479 seed + 85,
4480 esg_cfg.governance.board_size,
4481 esg_cfg.governance.independence_target,
4482 );
4483 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4484
4485 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4487 esg_cfg.supply_chain_esg.clone(),
4488 seed + 86,
4489 );
4490 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4491 .master_data
4492 .vendors
4493 .iter()
4494 .map(|v| datasynth_generators::VendorInput {
4495 vendor_id: v.vendor_id.clone(),
4496 country: v.country.clone(),
4497 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4498 quality_score: None,
4499 })
4500 .collect();
4501 snapshot.supplier_assessments =
4502 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4503
4504 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4506 seed + 87,
4507 esg_cfg.reporting.clone(),
4508 esg_cfg.climate_scenarios.clone(),
4509 );
4510 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4511 snapshot.disclosures = disclosure_gen.generate_disclosures(
4512 entity_id,
4513 &snapshot.materiality,
4514 start_date,
4515 end_date,
4516 );
4517 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4518 snapshot.disclosure_count = snapshot.disclosures.len();
4519
4520 if esg_cfg.anomaly_rate > 0.0 {
4522 let mut anomaly_injector =
4523 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4524 let mut labels = Vec::new();
4525 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4526 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4527 labels.extend(
4528 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4529 );
4530 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4531 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4532 snapshot.anomaly_labels = labels;
4533 }
4534
4535 stats.esg_emission_count = snapshot.emission_count;
4536 stats.esg_disclosure_count = snapshot.disclosure_count;
4537
4538 info!(
4539 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4540 snapshot.emission_count,
4541 snapshot.disclosure_count,
4542 snapshot.supplier_assessments.len()
4543 );
4544 self.check_resources_with_log("post-esg")?;
4545
4546 Ok(snapshot)
4547 }
4548
4549 fn phase_treasury_data(
4551 &mut self,
4552 document_flows: &DocumentFlowSnapshot,
4553 stats: &mut EnhancedGenerationStatistics,
4554 ) -> SynthResult<TreasurySnapshot> {
4555 if !self.config.treasury.enabled {
4556 debug!("Phase 22: Skipped (treasury generation disabled)");
4557 return Ok(TreasurySnapshot::default());
4558 }
4559 info!("Phase 22: Generating Treasury Data");
4560
4561 let seed = self.seed;
4562 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4563 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4564 let currency = self
4565 .config
4566 .companies
4567 .first()
4568 .map(|c| c.currency.as_str())
4569 .unwrap_or("USD");
4570 let entity_id = self
4571 .config
4572 .companies
4573 .first()
4574 .map(|c| c.code.as_str())
4575 .unwrap_or("1000");
4576
4577 let mut snapshot = TreasurySnapshot::default();
4578
4579 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4581 self.config.treasury.debt.clone(),
4582 seed + 90,
4583 );
4584 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4585
4586 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4588 self.config.treasury.hedging.clone(),
4589 seed + 91,
4590 );
4591 for debt in &snapshot.debt_instruments {
4592 if debt.rate_type == InterestRateType::Variable {
4593 let swap = hedge_gen.generate_ir_swap(
4594 currency,
4595 debt.principal,
4596 debt.origination_date,
4597 debt.maturity_date,
4598 );
4599 snapshot.hedging_instruments.push(swap);
4600 }
4601 }
4602
4603 {
4606 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4607 for payment in &document_flows.payments {
4608 if payment.currency != currency {
4609 let entry = fx_map
4610 .entry(payment.currency.clone())
4611 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4612 entry.0 += payment.amount;
4613 if payment.header.document_date > entry.1 {
4615 entry.1 = payment.header.document_date;
4616 }
4617 }
4618 }
4619 if !fx_map.is_empty() {
4620 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4621 .into_iter()
4622 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4623 datasynth_generators::treasury::FxExposure {
4624 currency_pair: format!("{}/{}", foreign_ccy, currency),
4625 foreign_currency: foreign_ccy,
4626 net_amount,
4627 settlement_date,
4628 description: "AP payment FX exposure".to_string(),
4629 }
4630 })
4631 .collect();
4632 let (fx_instruments, fx_relationships) =
4633 hedge_gen.generate(start_date, &fx_exposures);
4634 snapshot.hedging_instruments.extend(fx_instruments);
4635 snapshot.hedge_relationships.extend(fx_relationships);
4636 }
4637 }
4638
4639 if self.config.treasury.anomaly_rate > 0.0 {
4641 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4642 seed + 92,
4643 self.config.treasury.anomaly_rate,
4644 );
4645 let mut labels = Vec::new();
4646 labels.extend(
4647 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4648 );
4649 snapshot.treasury_anomaly_labels = labels;
4650 }
4651
4652 if self.config.treasury.cash_positioning.enabled {
4654 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4655
4656 for payment in &document_flows.payments {
4658 cash_flows.push(datasynth_generators::treasury::CashFlow {
4659 date: payment.header.document_date,
4660 account_id: format!("{}-MAIN", entity_id),
4661 amount: payment.amount,
4662 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4663 });
4664 }
4665
4666 for chain in &document_flows.o2c_chains {
4668 if let Some(ref receipt) = chain.customer_receipt {
4669 cash_flows.push(datasynth_generators::treasury::CashFlow {
4670 date: receipt.header.document_date,
4671 account_id: format!("{}-MAIN", entity_id),
4672 amount: receipt.amount,
4673 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4674 });
4675 }
4676 for receipt in &chain.remainder_receipts {
4678 cash_flows.push(datasynth_generators::treasury::CashFlow {
4679 date: receipt.header.document_date,
4680 account_id: format!("{}-MAIN", entity_id),
4681 amount: receipt.amount,
4682 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4683 });
4684 }
4685 }
4686
4687 if !cash_flows.is_empty() {
4688 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4689 self.config.treasury.cash_positioning.clone(),
4690 seed + 93,
4691 );
4692 let account_id = format!("{}-MAIN", entity_id);
4693 snapshot.cash_positions = cash_gen.generate(
4694 entity_id,
4695 &account_id,
4696 currency,
4697 &cash_flows,
4698 start_date,
4699 start_date + chrono::Months::new(self.config.global.period_months),
4700 rust_decimal::Decimal::new(1_000_000, 0), );
4702 }
4703 }
4704
4705 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4706 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4707 stats.cash_position_count = snapshot.cash_positions.len();
4708
4709 info!(
4710 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4711 snapshot.debt_instruments.len(),
4712 snapshot.hedging_instruments.len(),
4713 snapshot.cash_positions.len()
4714 );
4715 self.check_resources_with_log("post-treasury")?;
4716
4717 Ok(snapshot)
4718 }
4719
4720 fn phase_project_accounting(
4722 &mut self,
4723 document_flows: &DocumentFlowSnapshot,
4724 hr: &HrSnapshot,
4725 stats: &mut EnhancedGenerationStatistics,
4726 ) -> SynthResult<ProjectAccountingSnapshot> {
4727 if !self.config.project_accounting.enabled {
4728 debug!("Phase 23: Skipped (project accounting disabled)");
4729 return Ok(ProjectAccountingSnapshot::default());
4730 }
4731 info!("Phase 23: Generating Project Accounting Data");
4732
4733 let seed = self.seed;
4734 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4735 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4736 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4737 let company_code = self
4738 .config
4739 .companies
4740 .first()
4741 .map(|c| c.code.as_str())
4742 .unwrap_or("1000");
4743
4744 let mut snapshot = ProjectAccountingSnapshot::default();
4745
4746 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4748 self.config.project_accounting.clone(),
4749 seed + 95,
4750 );
4751 let pool = project_gen.generate(company_code, start_date, end_date);
4752 snapshot.projects = pool.projects.clone();
4753
4754 {
4756 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4757 Vec::new();
4758
4759 for te in &hr.time_entries {
4761 let total_hours = te.hours_regular + te.hours_overtime;
4762 if total_hours > 0.0 {
4763 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4764 id: te.entry_id.clone(),
4765 entity_id: company_code.to_string(),
4766 date: te.date,
4767 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4768 .unwrap_or(rust_decimal::Decimal::ZERO),
4769 source_type: CostSourceType::TimeEntry,
4770 hours: Some(
4771 rust_decimal::Decimal::from_f64_retain(total_hours)
4772 .unwrap_or(rust_decimal::Decimal::ZERO),
4773 ),
4774 });
4775 }
4776 }
4777
4778 for er in &hr.expense_reports {
4780 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4781 id: er.report_id.clone(),
4782 entity_id: company_code.to_string(),
4783 date: er.submission_date,
4784 amount: er.total_amount,
4785 source_type: CostSourceType::ExpenseReport,
4786 hours: None,
4787 });
4788 }
4789
4790 for po in &document_flows.purchase_orders {
4792 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4793 id: po.header.document_id.clone(),
4794 entity_id: company_code.to_string(),
4795 date: po.header.document_date,
4796 amount: po.total_net_amount,
4797 source_type: CostSourceType::PurchaseOrder,
4798 hours: None,
4799 });
4800 }
4801
4802 for vi in &document_flows.vendor_invoices {
4804 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4805 id: vi.header.document_id.clone(),
4806 entity_id: company_code.to_string(),
4807 date: vi.header.document_date,
4808 amount: vi.payable_amount,
4809 source_type: CostSourceType::VendorInvoice,
4810 hours: None,
4811 });
4812 }
4813
4814 if !source_docs.is_empty() && !pool.projects.is_empty() {
4815 let mut cost_gen =
4816 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4817 self.config.project_accounting.cost_allocation.clone(),
4818 seed + 99,
4819 );
4820 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4821 }
4822 }
4823
4824 if self.config.project_accounting.change_orders.enabled {
4826 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4827 self.config.project_accounting.change_orders.clone(),
4828 seed + 96,
4829 );
4830 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4831 }
4832
4833 if self.config.project_accounting.milestones.enabled {
4835 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4836 self.config.project_accounting.milestones.clone(),
4837 seed + 97,
4838 );
4839 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4840 }
4841
4842 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4844 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4845 self.config.project_accounting.earned_value.clone(),
4846 seed + 98,
4847 );
4848 snapshot.earned_value_metrics =
4849 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4850 }
4851
4852 stats.project_count = snapshot.projects.len();
4853 stats.project_change_order_count = snapshot.change_orders.len();
4854 stats.project_cost_line_count = snapshot.cost_lines.len();
4855
4856 info!(
4857 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4858 snapshot.projects.len(),
4859 snapshot.change_orders.len(),
4860 snapshot.milestones.len(),
4861 snapshot.earned_value_metrics.len()
4862 );
4863 self.check_resources_with_log("post-project-accounting")?;
4864
4865 Ok(snapshot)
4866 }
4867
4868 fn phase_opening_balances(
4870 &mut self,
4871 coa: &Arc<ChartOfAccounts>,
4872 stats: &mut EnhancedGenerationStatistics,
4873 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4874 if !self.config.balance.generate_opening_balances {
4875 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4876 return Ok(Vec::new());
4877 }
4878 info!("Phase 3b: Generating Opening Balances");
4879
4880 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4881 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4882 let fiscal_year = start_date.year();
4883
4884 let industry = match self.config.global.industry {
4885 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4886 IndustrySector::Retail => IndustryType::Retail,
4887 IndustrySector::FinancialServices => IndustryType::Financial,
4888 IndustrySector::Healthcare => IndustryType::Healthcare,
4889 IndustrySector::Technology => IndustryType::Technology,
4890 _ => IndustryType::Manufacturing,
4891 };
4892
4893 let config = datasynth_generators::OpeningBalanceConfig {
4894 industry,
4895 ..Default::default()
4896 };
4897 let mut gen =
4898 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4899
4900 let mut results = Vec::new();
4901 for company in &self.config.companies {
4902 let spec = OpeningBalanceSpec::new(
4903 company.code.clone(),
4904 start_date,
4905 fiscal_year,
4906 company.currency.clone(),
4907 rust_decimal::Decimal::new(10_000_000, 0),
4908 industry,
4909 );
4910 let ob = gen.generate(&spec, coa, start_date, &company.code);
4911 results.push(ob);
4912 }
4913
4914 stats.opening_balance_count = results.len();
4915 info!("Opening balances generated: {} companies", results.len());
4916 self.check_resources_with_log("post-opening-balances")?;
4917
4918 Ok(results)
4919 }
4920
4921 fn phase_subledger_reconciliation(
4923 &mut self,
4924 subledger: &SubledgerSnapshot,
4925 entries: &[JournalEntry],
4926 stats: &mut EnhancedGenerationStatistics,
4927 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4928 if !self.config.balance.reconcile_subledgers {
4929 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4930 return Ok(Vec::new());
4931 }
4932 info!("Phase 9b: Reconciling GL to subledger balances");
4933
4934 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4935 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4936 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4937
4938 let tracker_config = BalanceTrackerConfig {
4940 validate_on_each_entry: false,
4941 track_history: false,
4942 fail_on_validation_error: false,
4943 ..Default::default()
4944 };
4945 let recon_currency = self
4946 .config
4947 .companies
4948 .first()
4949 .map(|c| c.currency.clone())
4950 .unwrap_or_else(|| "USD".to_string());
4951 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4952 let _ = tracker.apply_entries(entries);
4953
4954 let mut engine = datasynth_generators::ReconciliationEngine::new(
4955 datasynth_generators::ReconciliationConfig::default(),
4956 );
4957
4958 let mut results = Vec::new();
4959 let company_code = self
4960 .config
4961 .companies
4962 .first()
4963 .map(|c| c.code.as_str())
4964 .unwrap_or("1000");
4965
4966 if !subledger.ar_invoices.is_empty() {
4968 let gl_balance = tracker
4969 .get_account_balance(
4970 company_code,
4971 datasynth_core::accounts::control_accounts::AR_CONTROL,
4972 )
4973 .map(|b| b.closing_balance)
4974 .unwrap_or_default();
4975 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4976 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4977 }
4978
4979 if !subledger.ap_invoices.is_empty() {
4981 let gl_balance = tracker
4982 .get_account_balance(
4983 company_code,
4984 datasynth_core::accounts::control_accounts::AP_CONTROL,
4985 )
4986 .map(|b| b.closing_balance)
4987 .unwrap_or_default();
4988 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4989 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4990 }
4991
4992 if !subledger.fa_records.is_empty() {
4994 let gl_asset_balance = tracker
4995 .get_account_balance(
4996 company_code,
4997 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4998 )
4999 .map(|b| b.closing_balance)
5000 .unwrap_or_default();
5001 let gl_accum_depr_balance = tracker
5002 .get_account_balance(
5003 company_code,
5004 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
5005 )
5006 .map(|b| b.closing_balance)
5007 .unwrap_or_default();
5008 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
5009 subledger.fa_records.iter().collect();
5010 let (asset_recon, depr_recon) = engine.reconcile_fa(
5011 company_code,
5012 end_date,
5013 gl_asset_balance,
5014 gl_accum_depr_balance,
5015 &fa_refs,
5016 );
5017 results.push(asset_recon);
5018 results.push(depr_recon);
5019 }
5020
5021 if !subledger.inventory_positions.is_empty() {
5023 let gl_balance = tracker
5024 .get_account_balance(
5025 company_code,
5026 datasynth_core::accounts::control_accounts::INVENTORY,
5027 )
5028 .map(|b| b.closing_balance)
5029 .unwrap_or_default();
5030 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
5031 subledger.inventory_positions.iter().collect();
5032 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
5033 }
5034
5035 stats.subledger_reconciliation_count = results.len();
5036 info!(
5037 "Subledger reconciliation complete: {} reconciliations",
5038 results.len()
5039 );
5040 self.check_resources_with_log("post-subledger-reconciliation")?;
5041
5042 Ok(results)
5043 }
5044
5045 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
5047 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
5048
5049 let coa_framework = self.resolve_coa_framework();
5050
5051 let mut gen = ChartOfAccountsGenerator::new(
5052 self.config.chart_of_accounts.complexity,
5053 self.config.global.industry,
5054 self.seed,
5055 )
5056 .with_coa_framework(coa_framework);
5057
5058 let coa = Arc::new(gen.generate());
5059 self.coa = Some(Arc::clone(&coa));
5060
5061 if let Some(pb) = pb {
5062 pb.finish_with_message("Chart of Accounts complete");
5063 }
5064
5065 Ok(coa)
5066 }
5067
5068 fn generate_master_data(&mut self) -> SynthResult<()> {
5070 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5071 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5072 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5073
5074 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
5076
5077 let pack = self.primary_pack().clone();
5079
5080 let vendors_per_company = self.phase_config.vendors_per_company;
5082 let customers_per_company = self.phase_config.customers_per_company;
5083 let materials_per_company = self.phase_config.materials_per_company;
5084 let assets_per_company = self.phase_config.assets_per_company;
5085 let coa_framework = self.resolve_coa_framework();
5086
5087 let per_company_results: Vec<_> = self
5090 .config
5091 .companies
5092 .par_iter()
5093 .enumerate()
5094 .map(|(i, company)| {
5095 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
5096 let pack = pack.clone();
5097
5098 let mut vendor_gen = VendorGenerator::new(company_seed);
5100 vendor_gen.set_country_pack(pack.clone());
5101 vendor_gen.set_coa_framework(coa_framework);
5102 let vendor_pool =
5103 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5104
5105 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5107 customer_gen.set_country_pack(pack.clone());
5108 customer_gen.set_coa_framework(coa_framework);
5109 let customer_pool = customer_gen.generate_customer_pool(
5110 customers_per_company,
5111 &company.code,
5112 start_date,
5113 );
5114
5115 let mut material_gen = MaterialGenerator::new(company_seed + 200);
5117 material_gen.set_country_pack(pack);
5118 let material_pool = material_gen.generate_material_pool(
5119 materials_per_company,
5120 &company.code,
5121 start_date,
5122 );
5123
5124 let mut asset_gen = AssetGenerator::new(company_seed + 300);
5126 let asset_pool = asset_gen.generate_asset_pool(
5127 assets_per_company,
5128 &company.code,
5129 (start_date, end_date),
5130 );
5131
5132 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5134 let employee_pool =
5135 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5136
5137 (
5138 vendor_pool.vendors,
5139 customer_pool.customers,
5140 material_pool.materials,
5141 asset_pool.assets,
5142 employee_pool.employees,
5143 )
5144 })
5145 .collect();
5146
5147 for (vendors, customers, materials, assets, employees) in per_company_results {
5149 self.master_data.vendors.extend(vendors);
5150 self.master_data.customers.extend(customers);
5151 self.master_data.materials.extend(materials);
5152 self.master_data.assets.extend(assets);
5153 self.master_data.employees.extend(employees);
5154 }
5155
5156 if let Some(pb) = &pb {
5157 pb.inc(total);
5158 }
5159 if let Some(pb) = pb {
5160 pb.finish_with_message("Master data generation complete");
5161 }
5162
5163 Ok(())
5164 }
5165
5166 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5168 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5169 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5170
5171 let months = (self.config.global.period_months as usize).max(1);
5174 let p2p_count = self
5175 .phase_config
5176 .p2p_chains
5177 .min(self.master_data.vendors.len() * 2 * months);
5178 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5179
5180 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5182 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5183 p2p_gen.set_country_pack(self.primary_pack().clone());
5184
5185 for i in 0..p2p_count {
5186 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5187 let materials: Vec<&Material> = self
5188 .master_data
5189 .materials
5190 .iter()
5191 .skip(i % self.master_data.materials.len().max(1))
5192 .take(2.min(self.master_data.materials.len()))
5193 .collect();
5194
5195 if materials.is_empty() {
5196 continue;
5197 }
5198
5199 let company = &self.config.companies[i % self.config.companies.len()];
5200 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5201 let fiscal_period = po_date.month() as u8;
5202 let created_by = if self.master_data.employees.is_empty() {
5203 "SYSTEM"
5204 } else {
5205 self.master_data.employees[i % self.master_data.employees.len()]
5206 .user_id
5207 .as_str()
5208 };
5209
5210 let chain = p2p_gen.generate_chain(
5211 &company.code,
5212 vendor,
5213 &materials,
5214 po_date,
5215 start_date.year() as u16,
5216 fiscal_period,
5217 created_by,
5218 );
5219
5220 flows.purchase_orders.push(chain.purchase_order.clone());
5222 flows.goods_receipts.extend(chain.goods_receipts.clone());
5223 if let Some(vi) = &chain.vendor_invoice {
5224 flows.vendor_invoices.push(vi.clone());
5225 }
5226 if let Some(payment) = &chain.payment {
5227 flows.payments.push(payment.clone());
5228 }
5229 for remainder in &chain.remainder_payments {
5230 flows.payments.push(remainder.clone());
5231 }
5232 flows.p2p_chains.push(chain);
5233
5234 if let Some(pb) = &pb {
5235 pb.inc(1);
5236 }
5237 }
5238
5239 if let Some(pb) = pb {
5240 pb.finish_with_message("P2P document flows complete");
5241 }
5242
5243 let o2c_count = self
5246 .phase_config
5247 .o2c_chains
5248 .min(self.master_data.customers.len() * 2 * months);
5249 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5250
5251 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5253 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5254 o2c_gen.set_country_pack(self.primary_pack().clone());
5255
5256 for i in 0..o2c_count {
5257 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5258 let materials: Vec<&Material> = self
5259 .master_data
5260 .materials
5261 .iter()
5262 .skip(i % self.master_data.materials.len().max(1))
5263 .take(2.min(self.master_data.materials.len()))
5264 .collect();
5265
5266 if materials.is_empty() {
5267 continue;
5268 }
5269
5270 let company = &self.config.companies[i % self.config.companies.len()];
5271 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5272 let fiscal_period = so_date.month() as u8;
5273 let created_by = if self.master_data.employees.is_empty() {
5274 "SYSTEM"
5275 } else {
5276 self.master_data.employees[i % self.master_data.employees.len()]
5277 .user_id
5278 .as_str()
5279 };
5280
5281 let chain = o2c_gen.generate_chain(
5282 &company.code,
5283 customer,
5284 &materials,
5285 so_date,
5286 start_date.year() as u16,
5287 fiscal_period,
5288 created_by,
5289 );
5290
5291 flows.sales_orders.push(chain.sales_order.clone());
5293 flows.deliveries.extend(chain.deliveries.clone());
5294 if let Some(ci) = &chain.customer_invoice {
5295 flows.customer_invoices.push(ci.clone());
5296 }
5297 if let Some(receipt) = &chain.customer_receipt {
5298 flows.payments.push(receipt.clone());
5299 }
5300 for receipt in &chain.remainder_receipts {
5302 flows.payments.push(receipt.clone());
5303 }
5304 flows.o2c_chains.push(chain);
5305
5306 if let Some(pb) = &pb {
5307 pb.inc(1);
5308 }
5309 }
5310
5311 if let Some(pb) = pb {
5312 pb.finish_with_message("O2C document flows complete");
5313 }
5314
5315 Ok(())
5316 }
5317
5318 fn generate_journal_entries(
5320 &mut self,
5321 coa: &Arc<ChartOfAccounts>,
5322 ) -> SynthResult<Vec<JournalEntry>> {
5323 use datasynth_core::traits::ParallelGenerator;
5324
5325 let total = self.calculate_total_transactions();
5326 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5327
5328 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5329 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5330 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5331
5332 let company_codes: Vec<String> = self
5333 .config
5334 .companies
5335 .iter()
5336 .map(|c| c.code.clone())
5337 .collect();
5338
5339 let generator = JournalEntryGenerator::new_with_params(
5340 self.config.transactions.clone(),
5341 Arc::clone(coa),
5342 company_codes,
5343 start_date,
5344 end_date,
5345 self.seed,
5346 );
5347
5348 let je_pack = self.primary_pack();
5352
5353 let mut generator = generator
5354 .with_master_data(
5355 &self.master_data.vendors,
5356 &self.master_data.customers,
5357 &self.master_data.materials,
5358 )
5359 .with_country_pack_names(je_pack)
5360 .with_country_pack_temporal(
5361 self.config.temporal_patterns.clone(),
5362 self.seed + 200,
5363 je_pack,
5364 )
5365 .with_persona_errors(true)
5366 .with_fraud_config(self.config.fraud.clone());
5367
5368 if self.config.temporal.enabled {
5370 let drift_config = self.config.temporal.to_core_config();
5371 generator = generator.with_drift_config(drift_config, self.seed + 100);
5372 }
5373
5374 self.check_memory_limit()?;
5376
5377 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5379
5380 let entries = if total >= 10_000 && num_threads > 1 {
5384 let sub_generators = generator.split(num_threads);
5387 let entries_per_thread = total as usize / num_threads;
5388 let remainder = total as usize % num_threads;
5389
5390 let batches: Vec<Vec<JournalEntry>> = sub_generators
5391 .into_par_iter()
5392 .enumerate()
5393 .map(|(i, mut gen)| {
5394 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5395 gen.generate_batch(count)
5396 })
5397 .collect();
5398
5399 let entries = JournalEntryGenerator::merge_results(batches);
5401
5402 if let Some(pb) = &pb {
5403 pb.inc(total);
5404 }
5405 entries
5406 } else {
5407 let mut entries = Vec::with_capacity(total as usize);
5409 for _ in 0..total {
5410 let entry = generator.generate();
5411 entries.push(entry);
5412 if let Some(pb) = &pb {
5413 pb.inc(1);
5414 }
5415 }
5416 entries
5417 };
5418
5419 if let Some(pb) = pb {
5420 pb.finish_with_message("Journal entries complete");
5421 }
5422
5423 Ok(entries)
5424 }
5425
5426 fn generate_jes_from_document_flows(
5431 &mut self,
5432 flows: &DocumentFlowSnapshot,
5433 ) -> SynthResult<Vec<JournalEntry>> {
5434 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5435 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5436
5437 let je_config = match self.resolve_coa_framework() {
5438 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5439 CoAFramework::GermanSkr04 => {
5440 let fa = datasynth_core::FrameworkAccounts::german_gaap();
5441 DocumentFlowJeConfig::from(&fa)
5442 }
5443 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5444 };
5445
5446 let populate_fec = je_config.populate_fec_fields;
5447 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5448
5449 if populate_fec {
5453 let mut aux_lookup = std::collections::HashMap::new();
5454 for vendor in &self.master_data.vendors {
5455 if let Some(ref aux) = vendor.auxiliary_gl_account {
5456 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5457 }
5458 }
5459 for customer in &self.master_data.customers {
5460 if let Some(ref aux) = customer.auxiliary_gl_account {
5461 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5462 }
5463 }
5464 if !aux_lookup.is_empty() {
5465 generator.set_auxiliary_account_lookup(aux_lookup);
5466 }
5467 }
5468
5469 let mut entries = Vec::new();
5470
5471 for chain in &flows.p2p_chains {
5473 let chain_entries = generator.generate_from_p2p_chain(chain);
5474 entries.extend(chain_entries);
5475 if let Some(pb) = &pb {
5476 pb.inc(1);
5477 }
5478 }
5479
5480 for chain in &flows.o2c_chains {
5482 let chain_entries = generator.generate_from_o2c_chain(chain);
5483 entries.extend(chain_entries);
5484 if let Some(pb) = &pb {
5485 pb.inc(1);
5486 }
5487 }
5488
5489 if let Some(pb) = pb {
5490 pb.finish_with_message(format!(
5491 "Generated {} JEs from document flows",
5492 entries.len()
5493 ));
5494 }
5495
5496 Ok(entries)
5497 }
5498
5499 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5505 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5506
5507 let mut jes = Vec::with_capacity(payroll_runs.len());
5508
5509 for run in payroll_runs {
5510 let mut je = JournalEntry::new_simple(
5511 format!("JE-PAYROLL-{}", run.payroll_id),
5512 run.company_code.clone(),
5513 run.run_date,
5514 format!("Payroll {}", run.payroll_id),
5515 );
5516
5517 je.add_line(JournalEntryLine {
5519 line_number: 1,
5520 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5521 debit_amount: run.total_gross,
5522 reference: Some(run.payroll_id.clone()),
5523 text: Some(format!(
5524 "Payroll {} ({} employees)",
5525 run.payroll_id, run.employee_count
5526 )),
5527 ..Default::default()
5528 });
5529
5530 je.add_line(JournalEntryLine {
5532 line_number: 2,
5533 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5534 credit_amount: run.total_gross,
5535 reference: Some(run.payroll_id.clone()),
5536 ..Default::default()
5537 });
5538
5539 jes.push(je);
5540 }
5541
5542 jes
5543 }
5544
5545 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5551 use datasynth_core::accounts::{control_accounts, expense_accounts};
5552 use datasynth_core::models::ProductionOrderStatus;
5553
5554 let mut jes = Vec::new();
5555
5556 for order in production_orders {
5557 if !matches!(
5559 order.status,
5560 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5561 ) {
5562 continue;
5563 }
5564
5565 let mut je = JournalEntry::new_simple(
5566 format!("JE-MFG-{}", order.order_id),
5567 order.company_code.clone(),
5568 order.actual_end.unwrap_or(order.planned_end),
5569 format!(
5570 "Production Order {} - {}",
5571 order.order_id, order.material_description
5572 ),
5573 );
5574
5575 je.add_line(JournalEntryLine {
5577 line_number: 1,
5578 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5579 debit_amount: order.actual_cost,
5580 reference: Some(order.order_id.clone()),
5581 text: Some(format!(
5582 "Material consumption for {}",
5583 order.material_description
5584 )),
5585 quantity: Some(order.actual_quantity),
5586 unit: Some("EA".to_string()),
5587 ..Default::default()
5588 });
5589
5590 je.add_line(JournalEntryLine {
5592 line_number: 2,
5593 gl_account: control_accounts::INVENTORY.to_string(),
5594 credit_amount: order.actual_cost,
5595 reference: Some(order.order_id.clone()),
5596 ..Default::default()
5597 });
5598
5599 jes.push(je);
5600 }
5601
5602 jes
5603 }
5604
5605 fn link_document_flows_to_subledgers(
5610 &mut self,
5611 flows: &DocumentFlowSnapshot,
5612 ) -> SynthResult<SubledgerSnapshot> {
5613 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5614 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5615
5616 let vendor_names: std::collections::HashMap<String, String> = self
5618 .master_data
5619 .vendors
5620 .iter()
5621 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5622 .collect();
5623 let customer_names: std::collections::HashMap<String, String> = self
5624 .master_data
5625 .customers
5626 .iter()
5627 .map(|c| (c.customer_id.clone(), c.name.clone()))
5628 .collect();
5629
5630 let mut linker = DocumentFlowLinker::new()
5631 .with_vendor_names(vendor_names)
5632 .with_customer_names(customer_names);
5633
5634 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5636 if let Some(pb) = &pb {
5637 pb.inc(flows.vendor_invoices.len() as u64);
5638 }
5639
5640 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5642 if let Some(pb) = &pb {
5643 pb.inc(flows.customer_invoices.len() as u64);
5644 }
5645
5646 if let Some(pb) = pb {
5647 pb.finish_with_message(format!(
5648 "Linked {} AP and {} AR invoices",
5649 ap_invoices.len(),
5650 ar_invoices.len()
5651 ));
5652 }
5653
5654 Ok(SubledgerSnapshot {
5655 ap_invoices,
5656 ar_invoices,
5657 fa_records: Vec::new(),
5658 inventory_positions: Vec::new(),
5659 inventory_movements: Vec::new(),
5660 })
5661 }
5662
5663 #[allow(clippy::too_many_arguments)]
5668 fn generate_ocpm_events(
5669 &mut self,
5670 flows: &DocumentFlowSnapshot,
5671 sourcing: &SourcingSnapshot,
5672 hr: &HrSnapshot,
5673 manufacturing: &ManufacturingSnapshot,
5674 banking: &BankingSnapshot,
5675 audit: &AuditSnapshot,
5676 financial_reporting: &FinancialReportingSnapshot,
5677 ) -> SynthResult<OcpmSnapshot> {
5678 let total_chains = flows.p2p_chains.len()
5679 + flows.o2c_chains.len()
5680 + sourcing.sourcing_projects.len()
5681 + hr.payroll_runs.len()
5682 + manufacturing.production_orders.len()
5683 + banking.customers.len()
5684 + audit.engagements.len()
5685 + financial_reporting.bank_reconciliations.len();
5686 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5687
5688 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5690 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5691
5692 let ocpm_config = OcpmGeneratorConfig {
5694 generate_p2p: true,
5695 generate_o2c: true,
5696 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5697 generate_h2r: !hr.payroll_runs.is_empty(),
5698 generate_mfg: !manufacturing.production_orders.is_empty(),
5699 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5700 generate_bank: !banking.customers.is_empty(),
5701 generate_audit: !audit.engagements.is_empty(),
5702 happy_path_rate: 0.75,
5703 exception_path_rate: 0.20,
5704 error_path_rate: 0.05,
5705 add_duration_variability: true,
5706 duration_std_dev_factor: 0.3,
5707 };
5708 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5709 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5710
5711 let available_users: Vec<String> = self
5713 .master_data
5714 .employees
5715 .iter()
5716 .take(20)
5717 .map(|e| e.user_id.clone())
5718 .collect();
5719
5720 let fallback_date =
5722 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5723 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5724 .unwrap_or(fallback_date);
5725 let base_midnight = base_date
5726 .and_hms_opt(0, 0, 0)
5727 .expect("midnight is always valid");
5728 let base_datetime =
5729 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5730
5731 let add_result = |event_log: &mut OcpmEventLog,
5733 result: datasynth_ocpm::CaseGenerationResult| {
5734 for event in result.events {
5735 event_log.add_event(event);
5736 }
5737 for object in result.objects {
5738 event_log.add_object(object);
5739 }
5740 for relationship in result.relationships {
5741 event_log.add_relationship(relationship);
5742 }
5743 for corr in result.correlation_events {
5744 event_log.add_correlation_event(corr);
5745 }
5746 event_log.add_case(result.case_trace);
5747 };
5748
5749 for chain in &flows.p2p_chains {
5751 let po = &chain.purchase_order;
5752 let documents = P2pDocuments::new(
5753 &po.header.document_id,
5754 &po.vendor_id,
5755 &po.header.company_code,
5756 po.total_net_amount,
5757 &po.header.currency,
5758 &ocpm_uuid_factory,
5759 )
5760 .with_goods_receipt(
5761 chain
5762 .goods_receipts
5763 .first()
5764 .map(|gr| gr.header.document_id.as_str())
5765 .unwrap_or(""),
5766 &ocpm_uuid_factory,
5767 )
5768 .with_invoice(
5769 chain
5770 .vendor_invoice
5771 .as_ref()
5772 .map(|vi| vi.header.document_id.as_str())
5773 .unwrap_or(""),
5774 &ocpm_uuid_factory,
5775 )
5776 .with_payment(
5777 chain
5778 .payment
5779 .as_ref()
5780 .map(|p| p.header.document_id.as_str())
5781 .unwrap_or(""),
5782 &ocpm_uuid_factory,
5783 );
5784
5785 let start_time =
5786 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5787 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5788 add_result(&mut event_log, result);
5789
5790 if let Some(pb) = &pb {
5791 pb.inc(1);
5792 }
5793 }
5794
5795 for chain in &flows.o2c_chains {
5797 let so = &chain.sales_order;
5798 let documents = O2cDocuments::new(
5799 &so.header.document_id,
5800 &so.customer_id,
5801 &so.header.company_code,
5802 so.total_net_amount,
5803 &so.header.currency,
5804 &ocpm_uuid_factory,
5805 )
5806 .with_delivery(
5807 chain
5808 .deliveries
5809 .first()
5810 .map(|d| d.header.document_id.as_str())
5811 .unwrap_or(""),
5812 &ocpm_uuid_factory,
5813 )
5814 .with_invoice(
5815 chain
5816 .customer_invoice
5817 .as_ref()
5818 .map(|ci| ci.header.document_id.as_str())
5819 .unwrap_or(""),
5820 &ocpm_uuid_factory,
5821 )
5822 .with_receipt(
5823 chain
5824 .customer_receipt
5825 .as_ref()
5826 .map(|r| r.header.document_id.as_str())
5827 .unwrap_or(""),
5828 &ocpm_uuid_factory,
5829 );
5830
5831 let start_time =
5832 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5833 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5834 add_result(&mut event_log, result);
5835
5836 if let Some(pb) = &pb {
5837 pb.inc(1);
5838 }
5839 }
5840
5841 for project in &sourcing.sourcing_projects {
5843 let vendor_id = sourcing
5845 .contracts
5846 .iter()
5847 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5848 .map(|c| c.vendor_id.clone())
5849 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5850 .or_else(|| {
5851 self.master_data
5852 .vendors
5853 .first()
5854 .map(|v| v.vendor_id.clone())
5855 })
5856 .unwrap_or_else(|| "V000".to_string());
5857 let mut docs = S2cDocuments::new(
5858 &project.project_id,
5859 &vendor_id,
5860 &project.company_code,
5861 project.estimated_annual_spend,
5862 &ocpm_uuid_factory,
5863 );
5864 if let Some(rfx) = sourcing
5866 .rfx_events
5867 .iter()
5868 .find(|r| r.sourcing_project_id == project.project_id)
5869 {
5870 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5871 if let Some(bid) = sourcing.bids.iter().find(|b| {
5873 b.rfx_id == rfx.rfx_id
5874 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5875 }) {
5876 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5877 }
5878 }
5879 if let Some(contract) = sourcing
5881 .contracts
5882 .iter()
5883 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5884 {
5885 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5886 }
5887 let start_time = base_datetime - chrono::Duration::days(90);
5888 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5889 add_result(&mut event_log, result);
5890
5891 if let Some(pb) = &pb {
5892 pb.inc(1);
5893 }
5894 }
5895
5896 for run in &hr.payroll_runs {
5898 let employee_id = hr
5900 .payroll_line_items
5901 .iter()
5902 .find(|li| li.payroll_id == run.payroll_id)
5903 .map(|li| li.employee_id.as_str())
5904 .unwrap_or("EMP000");
5905 let docs = H2rDocuments::new(
5906 &run.payroll_id,
5907 employee_id,
5908 &run.company_code,
5909 run.total_gross,
5910 &ocpm_uuid_factory,
5911 )
5912 .with_time_entries(
5913 hr.time_entries
5914 .iter()
5915 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5916 .take(5)
5917 .map(|t| t.entry_id.as_str())
5918 .collect(),
5919 );
5920 let start_time = base_datetime - chrono::Duration::days(30);
5921 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5922 add_result(&mut event_log, result);
5923
5924 if let Some(pb) = &pb {
5925 pb.inc(1);
5926 }
5927 }
5928
5929 for order in &manufacturing.production_orders {
5931 let mut docs = MfgDocuments::new(
5932 &order.order_id,
5933 &order.material_id,
5934 &order.company_code,
5935 order.planned_quantity,
5936 &ocpm_uuid_factory,
5937 )
5938 .with_operations(
5939 order
5940 .operations
5941 .iter()
5942 .map(|o| format!("OP-{:04}", o.operation_number))
5943 .collect::<Vec<_>>()
5944 .iter()
5945 .map(|s| s.as_str())
5946 .collect(),
5947 );
5948 if let Some(insp) = manufacturing
5950 .quality_inspections
5951 .iter()
5952 .find(|i| i.reference_id == order.order_id)
5953 {
5954 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5955 }
5956 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5958 cc.items
5959 .iter()
5960 .any(|item| item.material_id == order.material_id)
5961 }) {
5962 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5963 }
5964 let start_time = base_datetime - chrono::Duration::days(60);
5965 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5966 add_result(&mut event_log, result);
5967
5968 if let Some(pb) = &pb {
5969 pb.inc(1);
5970 }
5971 }
5972
5973 for customer in &banking.customers {
5975 let customer_id_str = customer.customer_id.to_string();
5976 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5977 if let Some(account) = banking
5979 .accounts
5980 .iter()
5981 .find(|a| a.primary_owner_id == customer.customer_id)
5982 {
5983 let account_id_str = account.account_id.to_string();
5984 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5985 let txn_strs: Vec<String> = banking
5987 .transactions
5988 .iter()
5989 .filter(|t| t.account_id == account.account_id)
5990 .take(10)
5991 .map(|t| t.transaction_id.to_string())
5992 .collect();
5993 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5994 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5995 .transactions
5996 .iter()
5997 .filter(|t| t.account_id == account.account_id)
5998 .take(10)
5999 .map(|t| t.amount)
6000 .collect();
6001 if !txn_ids.is_empty() {
6002 docs = docs.with_transactions(txn_ids, txn_amounts);
6003 }
6004 }
6005 let start_time = base_datetime - chrono::Duration::days(180);
6006 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
6007 add_result(&mut event_log, result);
6008
6009 if let Some(pb) = &pb {
6010 pb.inc(1);
6011 }
6012 }
6013
6014 for engagement in &audit.engagements {
6016 let engagement_id_str = engagement.engagement_id.to_string();
6017 let docs = AuditDocuments::new(
6018 &engagement_id_str,
6019 &engagement.client_entity_id,
6020 &ocpm_uuid_factory,
6021 )
6022 .with_workpapers(
6023 audit
6024 .workpapers
6025 .iter()
6026 .filter(|w| w.engagement_id == engagement.engagement_id)
6027 .take(10)
6028 .map(|w| w.workpaper_id.to_string())
6029 .collect::<Vec<_>>()
6030 .iter()
6031 .map(|s| s.as_str())
6032 .collect(),
6033 )
6034 .with_evidence(
6035 audit
6036 .evidence
6037 .iter()
6038 .filter(|e| e.engagement_id == engagement.engagement_id)
6039 .take(10)
6040 .map(|e| e.evidence_id.to_string())
6041 .collect::<Vec<_>>()
6042 .iter()
6043 .map(|s| s.as_str())
6044 .collect(),
6045 )
6046 .with_risks(
6047 audit
6048 .risk_assessments
6049 .iter()
6050 .filter(|r| r.engagement_id == engagement.engagement_id)
6051 .take(5)
6052 .map(|r| r.risk_id.to_string())
6053 .collect::<Vec<_>>()
6054 .iter()
6055 .map(|s| s.as_str())
6056 .collect(),
6057 )
6058 .with_findings(
6059 audit
6060 .findings
6061 .iter()
6062 .filter(|f| f.engagement_id == engagement.engagement_id)
6063 .take(5)
6064 .map(|f| f.finding_id.to_string())
6065 .collect::<Vec<_>>()
6066 .iter()
6067 .map(|s| s.as_str())
6068 .collect(),
6069 )
6070 .with_judgments(
6071 audit
6072 .judgments
6073 .iter()
6074 .filter(|j| j.engagement_id == engagement.engagement_id)
6075 .take(5)
6076 .map(|j| j.judgment_id.to_string())
6077 .collect::<Vec<_>>()
6078 .iter()
6079 .map(|s| s.as_str())
6080 .collect(),
6081 );
6082 let start_time = base_datetime - chrono::Duration::days(120);
6083 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
6084 add_result(&mut event_log, result);
6085
6086 if let Some(pb) = &pb {
6087 pb.inc(1);
6088 }
6089 }
6090
6091 for recon in &financial_reporting.bank_reconciliations {
6093 let docs = BankReconDocuments::new(
6094 &recon.reconciliation_id,
6095 &recon.bank_account_id,
6096 &recon.company_code,
6097 recon.bank_ending_balance,
6098 &ocpm_uuid_factory,
6099 )
6100 .with_statement_lines(
6101 recon
6102 .statement_lines
6103 .iter()
6104 .take(20)
6105 .map(|l| l.line_id.as_str())
6106 .collect(),
6107 )
6108 .with_reconciling_items(
6109 recon
6110 .reconciling_items
6111 .iter()
6112 .take(10)
6113 .map(|i| i.item_id.as_str())
6114 .collect(),
6115 );
6116 let start_time = base_datetime - chrono::Duration::days(30);
6117 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6118 add_result(&mut event_log, result);
6119
6120 if let Some(pb) = &pb {
6121 pb.inc(1);
6122 }
6123 }
6124
6125 event_log.compute_variants();
6127
6128 let summary = event_log.summary();
6129
6130 if let Some(pb) = pb {
6131 pb.finish_with_message(format!(
6132 "Generated {} OCPM events, {} objects",
6133 summary.event_count, summary.object_count
6134 ));
6135 }
6136
6137 Ok(OcpmSnapshot {
6138 event_count: summary.event_count,
6139 object_count: summary.object_count,
6140 case_count: summary.case_count,
6141 event_log: Some(event_log),
6142 })
6143 }
6144
6145 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6147 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6148
6149 let total_rate = if self.config.anomaly_injection.enabled {
6152 self.config.anomaly_injection.rates.total_rate
6153 } else if self.config.fraud.enabled {
6154 self.config.fraud.fraud_rate
6155 } else {
6156 0.02
6157 };
6158
6159 let fraud_rate = if self.config.anomaly_injection.enabled {
6160 self.config.anomaly_injection.rates.fraud_rate
6161 } else {
6162 AnomalyRateConfig::default().fraud_rate
6163 };
6164
6165 let error_rate = if self.config.anomaly_injection.enabled {
6166 self.config.anomaly_injection.rates.error_rate
6167 } else {
6168 AnomalyRateConfig::default().error_rate
6169 };
6170
6171 let process_issue_rate = if self.config.anomaly_injection.enabled {
6172 self.config.anomaly_injection.rates.process_rate
6173 } else {
6174 AnomalyRateConfig::default().process_issue_rate
6175 };
6176
6177 let anomaly_config = AnomalyInjectorConfig {
6178 rates: AnomalyRateConfig {
6179 total_rate,
6180 fraud_rate,
6181 error_rate,
6182 process_issue_rate,
6183 ..Default::default()
6184 },
6185 seed: self.seed + 5000,
6186 ..Default::default()
6187 };
6188
6189 let mut injector = AnomalyInjector::new(anomaly_config);
6190 let result = injector.process_entries(entries);
6191
6192 if let Some(pb) = &pb {
6193 pb.inc(entries.len() as u64);
6194 pb.finish_with_message("Anomaly injection complete");
6195 }
6196
6197 let mut by_type = HashMap::new();
6198 for label in &result.labels {
6199 *by_type
6200 .entry(format!("{:?}", label.anomaly_type))
6201 .or_insert(0) += 1;
6202 }
6203
6204 Ok(AnomalyLabels {
6205 labels: result.labels,
6206 summary: Some(result.summary),
6207 by_type,
6208 })
6209 }
6210
6211 fn validate_journal_entries(
6220 &mut self,
6221 entries: &[JournalEntry],
6222 ) -> SynthResult<BalanceValidationResult> {
6223 let clean_entries: Vec<&JournalEntry> = entries
6225 .iter()
6226 .filter(|e| {
6227 e.header
6228 .header_text
6229 .as_ref()
6230 .map(|t| !t.contains("[HUMAN_ERROR:"))
6231 .unwrap_or(true)
6232 })
6233 .collect();
6234
6235 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6236
6237 let config = BalanceTrackerConfig {
6239 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6243 };
6244 let validation_currency = self
6245 .config
6246 .companies
6247 .first()
6248 .map(|c| c.currency.clone())
6249 .unwrap_or_else(|| "USD".to_string());
6250
6251 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6252
6253 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6255 let errors = tracker.apply_entries(&clean_refs);
6256
6257 if let Some(pb) = &pb {
6258 pb.inc(entries.len() as u64);
6259 }
6260
6261 let has_unbalanced = tracker
6264 .get_validation_errors()
6265 .iter()
6266 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6267
6268 let mut all_errors = errors;
6271 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6272 let company_codes: Vec<String> = self
6273 .config
6274 .companies
6275 .iter()
6276 .map(|c| c.code.clone())
6277 .collect();
6278
6279 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6280 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6281 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6282
6283 for company_code in &company_codes {
6284 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6285 all_errors.push(e);
6286 }
6287 }
6288
6289 let stats = tracker.get_statistics();
6291
6292 let is_balanced = all_errors.is_empty();
6294
6295 if let Some(pb) = pb {
6296 let msg = if is_balanced {
6297 "Balance validation passed"
6298 } else {
6299 "Balance validation completed with errors"
6300 };
6301 pb.finish_with_message(msg);
6302 }
6303
6304 Ok(BalanceValidationResult {
6305 validated: true,
6306 is_balanced,
6307 entries_processed: stats.entries_processed,
6308 total_debits: stats.total_debits,
6309 total_credits: stats.total_credits,
6310 accounts_tracked: stats.accounts_tracked,
6311 companies_tracked: stats.companies_tracked,
6312 validation_errors: all_errors,
6313 has_unbalanced_entries: has_unbalanced,
6314 })
6315 }
6316
6317 fn inject_data_quality(
6322 &mut self,
6323 entries: &mut [JournalEntry],
6324 ) -> SynthResult<DataQualityStats> {
6325 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6326
6327 let config = if self.config.data_quality.enabled {
6330 let dq = &self.config.data_quality;
6331 DataQualityConfig {
6332 enable_missing_values: dq.missing_values.enabled,
6333 missing_values: datasynth_generators::MissingValueConfig {
6334 global_rate: dq.effective_missing_rate(),
6335 ..Default::default()
6336 },
6337 enable_format_variations: dq.format_variations.enabled,
6338 format_variations: datasynth_generators::FormatVariationConfig {
6339 date_variation_rate: dq.format_variations.dates.rate,
6340 amount_variation_rate: dq.format_variations.amounts.rate,
6341 identifier_variation_rate: dq.format_variations.identifiers.rate,
6342 ..Default::default()
6343 },
6344 enable_duplicates: dq.duplicates.enabled,
6345 duplicates: datasynth_generators::DuplicateConfig {
6346 duplicate_rate: dq.effective_duplicate_rate(),
6347 ..Default::default()
6348 },
6349 enable_typos: dq.typos.enabled,
6350 typos: datasynth_generators::TypoConfig {
6351 char_error_rate: dq.effective_typo_rate(),
6352 ..Default::default()
6353 },
6354 enable_encoding_issues: dq.encoding_issues.enabled,
6355 encoding_issue_rate: dq.encoding_issues.rate,
6356 seed: self.seed.wrapping_add(77), track_statistics: true,
6358 }
6359 } else {
6360 DataQualityConfig::minimal()
6361 };
6362 let mut injector = DataQualityInjector::new(config);
6363
6364 injector.set_country_pack(self.primary_pack().clone());
6366
6367 let context = HashMap::new();
6369
6370 for entry in entries.iter_mut() {
6371 if let Some(text) = &entry.header.header_text {
6373 let processed = injector.process_text_field(
6374 "header_text",
6375 text,
6376 &entry.header.document_id.to_string(),
6377 &context,
6378 );
6379 match processed {
6380 Some(new_text) if new_text != *text => {
6381 entry.header.header_text = Some(new_text);
6382 }
6383 None => {
6384 entry.header.header_text = None; }
6386 _ => {}
6387 }
6388 }
6389
6390 if let Some(ref_text) = &entry.header.reference {
6392 let processed = injector.process_text_field(
6393 "reference",
6394 ref_text,
6395 &entry.header.document_id.to_string(),
6396 &context,
6397 );
6398 match processed {
6399 Some(new_text) if new_text != *ref_text => {
6400 entry.header.reference = Some(new_text);
6401 }
6402 None => {
6403 entry.header.reference = None;
6404 }
6405 _ => {}
6406 }
6407 }
6408
6409 let user_persona = entry.header.user_persona.clone();
6411 if let Some(processed) = injector.process_text_field(
6412 "user_persona",
6413 &user_persona,
6414 &entry.header.document_id.to_string(),
6415 &context,
6416 ) {
6417 if processed != user_persona {
6418 entry.header.user_persona = processed;
6419 }
6420 }
6421
6422 for line in &mut entry.lines {
6424 if let Some(ref text) = line.line_text {
6426 let processed = injector.process_text_field(
6427 "line_text",
6428 text,
6429 &entry.header.document_id.to_string(),
6430 &context,
6431 );
6432 match processed {
6433 Some(new_text) if new_text != *text => {
6434 line.line_text = Some(new_text);
6435 }
6436 None => {
6437 line.line_text = None;
6438 }
6439 _ => {}
6440 }
6441 }
6442
6443 if let Some(cc) = &line.cost_center {
6445 let processed = injector.process_text_field(
6446 "cost_center",
6447 cc,
6448 &entry.header.document_id.to_string(),
6449 &context,
6450 );
6451 match processed {
6452 Some(new_cc) if new_cc != *cc => {
6453 line.cost_center = Some(new_cc);
6454 }
6455 None => {
6456 line.cost_center = None;
6457 }
6458 _ => {}
6459 }
6460 }
6461 }
6462
6463 if let Some(pb) = &pb {
6464 pb.inc(1);
6465 }
6466 }
6467
6468 if let Some(pb) = pb {
6469 pb.finish_with_message("Data quality injection complete");
6470 }
6471
6472 Ok(injector.stats().clone())
6473 }
6474
6475 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6486 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6487 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6488 let fiscal_year = start_date.year() as u16;
6489 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6490
6491 let total_revenue: rust_decimal::Decimal = entries
6493 .iter()
6494 .flat_map(|e| e.lines.iter())
6495 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6496 .map(|l| l.credit_amount)
6497 .sum();
6498
6499 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6501
6502 let mut snapshot = AuditSnapshot::default();
6503
6504 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6506 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6507 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6508 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6509 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6510 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6511
6512 let accounts: Vec<String> = self
6514 .coa
6515 .as_ref()
6516 .map(|coa| {
6517 coa.get_postable_accounts()
6518 .iter()
6519 .map(|acc| acc.account_code().to_string())
6520 .collect()
6521 })
6522 .unwrap_or_default();
6523
6524 for (i, company) in self.config.companies.iter().enumerate() {
6526 let company_revenue = total_revenue
6528 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6529
6530 let engagements_for_company =
6532 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6533 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6534 1
6535 } else {
6536 0
6537 };
6538
6539 for _eng_idx in 0..(engagements_for_company + extra) {
6540 let mut engagement = engagement_gen.generate_engagement(
6542 &company.code,
6543 &company.name,
6544 fiscal_year,
6545 period_end,
6546 company_revenue,
6547 None, );
6549
6550 if !self.master_data.employees.is_empty() {
6552 let emp_count = self.master_data.employees.len();
6553 let base = (i * 10 + _eng_idx) % emp_count;
6555 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6556 .employee_id
6557 .clone();
6558 engagement.engagement_manager_id = self.master_data.employees
6559 [(base + 1) % emp_count]
6560 .employee_id
6561 .clone();
6562 let real_team: Vec<String> = engagement
6563 .team_member_ids
6564 .iter()
6565 .enumerate()
6566 .map(|(j, _)| {
6567 self.master_data.employees[(base + 2 + j) % emp_count]
6568 .employee_id
6569 .clone()
6570 })
6571 .collect();
6572 engagement.team_member_ids = real_team;
6573 }
6574
6575 if let Some(pb) = &pb {
6576 pb.inc(1);
6577 }
6578
6579 let team_members: Vec<String> = engagement.team_member_ids.clone();
6581
6582 let workpapers =
6584 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6585
6586 for wp in &workpapers {
6587 if let Some(pb) = &pb {
6588 pb.inc(1);
6589 }
6590
6591 let evidence = evidence_gen.generate_evidence_for_workpaper(
6593 wp,
6594 &team_members,
6595 wp.preparer_date,
6596 );
6597
6598 for _ in &evidence {
6599 if let Some(pb) = &pb {
6600 pb.inc(1);
6601 }
6602 }
6603
6604 snapshot.evidence.extend(evidence);
6605 }
6606
6607 let risks =
6609 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6610
6611 for _ in &risks {
6612 if let Some(pb) = &pb {
6613 pb.inc(1);
6614 }
6615 }
6616 snapshot.risk_assessments.extend(risks);
6617
6618 let findings = finding_gen.generate_findings_for_engagement(
6620 &engagement,
6621 &workpapers,
6622 &team_members,
6623 );
6624
6625 for _ in &findings {
6626 if let Some(pb) = &pb {
6627 pb.inc(1);
6628 }
6629 }
6630 snapshot.findings.extend(findings);
6631
6632 let judgments =
6634 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6635
6636 for _ in &judgments {
6637 if let Some(pb) = &pb {
6638 pb.inc(1);
6639 }
6640 }
6641 snapshot.judgments.extend(judgments);
6642
6643 snapshot.workpapers.extend(workpapers);
6645 snapshot.engagements.push(engagement);
6646 }
6647 }
6648
6649 if let Some(pb) = pb {
6650 pb.finish_with_message(format!(
6651 "Audit data: {} engagements, {} workpapers, {} evidence",
6652 snapshot.engagements.len(),
6653 snapshot.workpapers.len(),
6654 snapshot.evidence.len()
6655 ));
6656 }
6657
6658 Ok(snapshot)
6659 }
6660
6661 fn export_graphs(
6668 &mut self,
6669 entries: &[JournalEntry],
6670 _coa: &Arc<ChartOfAccounts>,
6671 stats: &mut EnhancedGenerationStatistics,
6672 ) -> SynthResult<GraphExportSnapshot> {
6673 let pb = self.create_progress_bar(100, "Exporting Graphs");
6674
6675 let mut snapshot = GraphExportSnapshot::default();
6676
6677 let output_dir = self
6679 .output_path
6680 .clone()
6681 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6682 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6683
6684 for graph_type in &self.config.graph_export.graph_types {
6686 if let Some(pb) = &pb {
6687 pb.inc(10);
6688 }
6689
6690 let graph_config = TransactionGraphConfig {
6692 include_vendors: false,
6693 include_customers: false,
6694 create_debit_credit_edges: true,
6695 include_document_nodes: graph_type.include_document_nodes,
6696 min_edge_weight: graph_type.min_edge_weight,
6697 aggregate_parallel_edges: graph_type.aggregate_edges,
6698 framework: None,
6699 };
6700
6701 let mut builder = TransactionGraphBuilder::new(graph_config);
6702 builder.add_journal_entries(entries);
6703 let graph = builder.build();
6704
6705 stats.graph_node_count += graph.node_count();
6707 stats.graph_edge_count += graph.edge_count();
6708
6709 if let Some(pb) = &pb {
6710 pb.inc(40);
6711 }
6712
6713 for format in &self.config.graph_export.formats {
6715 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6716
6717 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6719 warn!("Failed to create graph output directory: {}", e);
6720 continue;
6721 }
6722
6723 match format {
6724 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6725 let pyg_config = PyGExportConfig {
6726 common: datasynth_graph::CommonExportConfig {
6727 export_node_features: true,
6728 export_edge_features: true,
6729 export_node_labels: true,
6730 export_edge_labels: true,
6731 export_masks: true,
6732 train_ratio: self.config.graph_export.train_ratio,
6733 val_ratio: self.config.graph_export.validation_ratio,
6734 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6735 },
6736 one_hot_categoricals: false,
6737 };
6738
6739 let exporter = PyGExporter::new(pyg_config);
6740 match exporter.export(&graph, &format_dir) {
6741 Ok(metadata) => {
6742 snapshot.exports.insert(
6743 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6744 GraphExportInfo {
6745 name: graph_type.name.clone(),
6746 format: "pytorch_geometric".to_string(),
6747 output_path: format_dir.clone(),
6748 node_count: metadata.num_nodes,
6749 edge_count: metadata.num_edges,
6750 },
6751 );
6752 snapshot.graph_count += 1;
6753 }
6754 Err(e) => {
6755 warn!("Failed to export PyTorch Geometric graph: {}", e);
6756 }
6757 }
6758 }
6759 datasynth_config::schema::GraphExportFormat::Neo4j => {
6760 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6761
6762 let neo4j_config = Neo4jExportConfig {
6763 export_node_properties: true,
6764 export_edge_properties: true,
6765 export_features: true,
6766 generate_cypher: true,
6767 generate_admin_import: true,
6768 database_name: "synth".to_string(),
6769 cypher_batch_size: 1000,
6770 };
6771
6772 let exporter = Neo4jExporter::new(neo4j_config);
6773 match exporter.export(&graph, &format_dir) {
6774 Ok(metadata) => {
6775 snapshot.exports.insert(
6776 format!("{}_{}", graph_type.name, "neo4j"),
6777 GraphExportInfo {
6778 name: graph_type.name.clone(),
6779 format: "neo4j".to_string(),
6780 output_path: format_dir.clone(),
6781 node_count: metadata.num_nodes,
6782 edge_count: metadata.num_edges,
6783 },
6784 );
6785 snapshot.graph_count += 1;
6786 }
6787 Err(e) => {
6788 warn!("Failed to export Neo4j graph: {}", e);
6789 }
6790 }
6791 }
6792 datasynth_config::schema::GraphExportFormat::Dgl => {
6793 use datasynth_graph::{DGLExportConfig, DGLExporter};
6794
6795 let dgl_config = DGLExportConfig {
6796 common: datasynth_graph::CommonExportConfig {
6797 export_node_features: true,
6798 export_edge_features: true,
6799 export_node_labels: true,
6800 export_edge_labels: true,
6801 export_masks: true,
6802 train_ratio: self.config.graph_export.train_ratio,
6803 val_ratio: self.config.graph_export.validation_ratio,
6804 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6805 },
6806 heterogeneous: false,
6807 include_pickle_script: true, };
6809
6810 let exporter = DGLExporter::new(dgl_config);
6811 match exporter.export(&graph, &format_dir) {
6812 Ok(metadata) => {
6813 snapshot.exports.insert(
6814 format!("{}_{}", graph_type.name, "dgl"),
6815 GraphExportInfo {
6816 name: graph_type.name.clone(),
6817 format: "dgl".to_string(),
6818 output_path: format_dir.clone(),
6819 node_count: metadata.common.num_nodes,
6820 edge_count: metadata.common.num_edges,
6821 },
6822 );
6823 snapshot.graph_count += 1;
6824 }
6825 Err(e) => {
6826 warn!("Failed to export DGL graph: {}", e);
6827 }
6828 }
6829 }
6830 datasynth_config::schema::GraphExportFormat::RustGraph => {
6831 use datasynth_graph::{
6832 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6833 };
6834
6835 let rustgraph_config = RustGraphExportConfig {
6836 include_features: true,
6837 include_temporal: true,
6838 include_labels: true,
6839 source_name: "datasynth".to_string(),
6840 batch_id: None,
6841 output_format: RustGraphOutputFormat::JsonLines,
6842 export_node_properties: true,
6843 export_edge_properties: true,
6844 pretty_print: false,
6845 };
6846
6847 let exporter = RustGraphExporter::new(rustgraph_config);
6848 match exporter.export(&graph, &format_dir) {
6849 Ok(metadata) => {
6850 snapshot.exports.insert(
6851 format!("{}_{}", graph_type.name, "rustgraph"),
6852 GraphExportInfo {
6853 name: graph_type.name.clone(),
6854 format: "rustgraph".to_string(),
6855 output_path: format_dir.clone(),
6856 node_count: metadata.num_nodes,
6857 edge_count: metadata.num_edges,
6858 },
6859 );
6860 snapshot.graph_count += 1;
6861 }
6862 Err(e) => {
6863 warn!("Failed to export RustGraph: {}", e);
6864 }
6865 }
6866 }
6867 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6868 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6870 }
6871 }
6872 }
6873
6874 if let Some(pb) = &pb {
6875 pb.inc(40);
6876 }
6877 }
6878
6879 stats.graph_export_count = snapshot.graph_count;
6880 snapshot.exported = snapshot.graph_count > 0;
6881
6882 if let Some(pb) = pb {
6883 pb.finish_with_message(format!(
6884 "Graphs exported: {} graphs ({} nodes, {} edges)",
6885 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6886 ));
6887 }
6888
6889 Ok(snapshot)
6890 }
6891
6892 fn build_additional_graphs(
6897 &self,
6898 banking: &BankingSnapshot,
6899 _intercompany: &IntercompanySnapshot,
6900 entries: &[JournalEntry],
6901 stats: &mut EnhancedGenerationStatistics,
6902 ) {
6903 let output_dir = self
6904 .output_path
6905 .clone()
6906 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6907 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6908
6909 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6911 info!("Phase 10c: Building banking network graph");
6912 let config = BankingGraphConfig::default();
6913 let mut builder = BankingGraphBuilder::new(config);
6914 builder.add_customers(&banking.customers);
6915 builder.add_accounts(&banking.accounts, &banking.customers);
6916 builder.add_transactions(&banking.transactions);
6917 let graph = builder.build();
6918
6919 let node_count = graph.node_count();
6920 let edge_count = graph.edge_count();
6921 stats.graph_node_count += node_count;
6922 stats.graph_edge_count += edge_count;
6923
6924 for format in &self.config.graph_export.formats {
6926 if matches!(
6927 format,
6928 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6929 ) {
6930 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6931 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6932 warn!("Failed to create banking graph output dir: {}", e);
6933 continue;
6934 }
6935 let pyg_config = PyGExportConfig::default();
6936 let exporter = PyGExporter::new(pyg_config);
6937 if let Err(e) = exporter.export(&graph, &format_dir) {
6938 warn!("Failed to export banking graph as PyG: {}", e);
6939 } else {
6940 info!(
6941 "Banking network graph exported: {} nodes, {} edges",
6942 node_count, edge_count
6943 );
6944 }
6945 }
6946 }
6947 }
6948
6949 let approval_entries: Vec<_> = entries
6951 .iter()
6952 .filter(|je| je.header.approval_workflow.is_some())
6953 .collect();
6954
6955 if !approval_entries.is_empty() {
6956 info!(
6957 "Phase 10c: Building approval network graph ({} entries with approvals)",
6958 approval_entries.len()
6959 );
6960 let config = ApprovalGraphConfig::default();
6961 let mut builder = ApprovalGraphBuilder::new(config);
6962
6963 for je in &approval_entries {
6964 if let Some(ref wf) = je.header.approval_workflow {
6965 for action in &wf.actions {
6966 let record = datasynth_core::models::ApprovalRecord {
6967 approval_id: format!(
6968 "APR-{}-{}",
6969 je.header.document_id, action.approval_level
6970 ),
6971 document_number: je.header.document_id.to_string(),
6972 document_type: "JE".to_string(),
6973 company_code: je.company_code().to_string(),
6974 requester_id: wf.preparer_id.clone(),
6975 requester_name: Some(wf.preparer_name.clone()),
6976 approver_id: action.actor_id.clone(),
6977 approver_name: action.actor_name.clone(),
6978 approval_date: je.posting_date(),
6979 action: format!("{:?}", action.action),
6980 amount: wf.amount,
6981 approval_limit: None,
6982 comments: action.comments.clone(),
6983 delegation_from: None,
6984 is_auto_approved: false,
6985 };
6986 builder.add_approval(&record);
6987 }
6988 }
6989 }
6990
6991 let graph = builder.build();
6992 let node_count = graph.node_count();
6993 let edge_count = graph.edge_count();
6994 stats.graph_node_count += node_count;
6995 stats.graph_edge_count += edge_count;
6996
6997 for format in &self.config.graph_export.formats {
6999 if matches!(
7000 format,
7001 datasynth_config::schema::GraphExportFormat::PytorchGeometric
7002 ) {
7003 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
7004 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7005 warn!("Failed to create approval graph output dir: {}", e);
7006 continue;
7007 }
7008 let pyg_config = PyGExportConfig::default();
7009 let exporter = PyGExporter::new(pyg_config);
7010 if let Err(e) = exporter.export(&graph, &format_dir) {
7011 warn!("Failed to export approval graph as PyG: {}", e);
7012 } else {
7013 info!(
7014 "Approval network graph exported: {} nodes, {} edges",
7015 node_count, edge_count
7016 );
7017 }
7018 }
7019 }
7020 }
7021
7022 debug!(
7025 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
7026 available when intercompany relationships are modeled as IntercompanyRelationship)"
7027 );
7028 }
7029
7030 #[allow(clippy::too_many_arguments)]
7037 fn export_hypergraph(
7038 &self,
7039 coa: &Arc<ChartOfAccounts>,
7040 entries: &[JournalEntry],
7041 document_flows: &DocumentFlowSnapshot,
7042 sourcing: &SourcingSnapshot,
7043 hr: &HrSnapshot,
7044 manufacturing: &ManufacturingSnapshot,
7045 banking: &BankingSnapshot,
7046 audit: &AuditSnapshot,
7047 financial_reporting: &FinancialReportingSnapshot,
7048 ocpm: &OcpmSnapshot,
7049 stats: &mut EnhancedGenerationStatistics,
7050 ) -> SynthResult<HypergraphExportInfo> {
7051 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
7052 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
7053 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
7054 use datasynth_graph::models::hypergraph::AggregationStrategy;
7055
7056 let hg_settings = &self.config.graph_export.hypergraph;
7057
7058 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
7060 "truncate" => AggregationStrategy::Truncate,
7061 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
7062 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
7063 "importance_sample" => AggregationStrategy::ImportanceSample,
7064 _ => AggregationStrategy::PoolByCounterparty,
7065 };
7066
7067 let builder_config = HypergraphConfig {
7068 max_nodes: hg_settings.max_nodes,
7069 aggregation_strategy,
7070 include_coso: hg_settings.governance_layer.include_coso,
7071 include_controls: hg_settings.governance_layer.include_controls,
7072 include_sox: hg_settings.governance_layer.include_sox,
7073 include_vendors: hg_settings.governance_layer.include_vendors,
7074 include_customers: hg_settings.governance_layer.include_customers,
7075 include_employees: hg_settings.governance_layer.include_employees,
7076 include_p2p: hg_settings.process_layer.include_p2p,
7077 include_o2c: hg_settings.process_layer.include_o2c,
7078 include_s2c: hg_settings.process_layer.include_s2c,
7079 include_h2r: hg_settings.process_layer.include_h2r,
7080 include_mfg: hg_settings.process_layer.include_mfg,
7081 include_bank: hg_settings.process_layer.include_bank,
7082 include_audit: hg_settings.process_layer.include_audit,
7083 include_r2r: hg_settings.process_layer.include_r2r,
7084 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
7085 docs_per_counterparty_threshold: hg_settings
7086 .process_layer
7087 .docs_per_counterparty_threshold,
7088 include_accounts: hg_settings.accounting_layer.include_accounts,
7089 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
7090 include_cross_layer_edges: hg_settings.cross_layer.enabled,
7091 };
7092
7093 let mut builder = HypergraphBuilder::new(builder_config);
7094
7095 builder.add_coso_framework();
7097
7098 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
7101 let controls = InternalControl::standard_controls();
7102 builder.add_controls(&controls);
7103 }
7104
7105 builder.add_vendors(&self.master_data.vendors);
7107 builder.add_customers(&self.master_data.customers);
7108 builder.add_employees(&self.master_data.employees);
7109
7110 builder.add_p2p_documents(
7112 &document_flows.purchase_orders,
7113 &document_flows.goods_receipts,
7114 &document_flows.vendor_invoices,
7115 &document_flows.payments,
7116 );
7117 builder.add_o2c_documents(
7118 &document_flows.sales_orders,
7119 &document_flows.deliveries,
7120 &document_flows.customer_invoices,
7121 );
7122 builder.add_s2c_documents(
7123 &sourcing.sourcing_projects,
7124 &sourcing.qualifications,
7125 &sourcing.rfx_events,
7126 &sourcing.bids,
7127 &sourcing.bid_evaluations,
7128 &sourcing.contracts,
7129 );
7130 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7131 builder.add_mfg_documents(
7132 &manufacturing.production_orders,
7133 &manufacturing.quality_inspections,
7134 &manufacturing.cycle_counts,
7135 );
7136 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7137 builder.add_audit_documents(
7138 &audit.engagements,
7139 &audit.workpapers,
7140 &audit.findings,
7141 &audit.evidence,
7142 &audit.risk_assessments,
7143 &audit.judgments,
7144 );
7145 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7146
7147 if let Some(ref event_log) = ocpm.event_log {
7149 builder.add_ocpm_events(event_log);
7150 }
7151
7152 builder.add_accounts(coa);
7154 builder.add_journal_entries_as_hyperedges(entries);
7155
7156 let hypergraph = builder.build();
7158
7159 let output_dir = self
7161 .output_path
7162 .clone()
7163 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7164 let hg_dir = output_dir
7165 .join(&self.config.graph_export.output_subdirectory)
7166 .join(&hg_settings.output_subdirectory);
7167
7168 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7170 "unified" => {
7171 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7172 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7173 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7174 })?;
7175 (
7176 metadata.num_nodes,
7177 metadata.num_edges,
7178 metadata.num_hyperedges,
7179 )
7180 }
7181 _ => {
7182 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7184 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7185 SynthError::generation(format!("Hypergraph export failed: {}", e))
7186 })?;
7187 (
7188 metadata.num_nodes,
7189 metadata.num_edges,
7190 metadata.num_hyperedges,
7191 )
7192 }
7193 };
7194
7195 #[cfg(feature = "streaming")]
7197 if let Some(ref target_url) = hg_settings.stream_target {
7198 use crate::stream_client::{StreamClient, StreamConfig};
7199 use std::io::Write as _;
7200
7201 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7202 let stream_config = StreamConfig {
7203 target_url: target_url.clone(),
7204 batch_size: hg_settings.stream_batch_size,
7205 api_key,
7206 ..StreamConfig::default()
7207 };
7208
7209 match StreamClient::new(stream_config) {
7210 Ok(mut client) => {
7211 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7212 match exporter.export_to_writer(&hypergraph, &mut client) {
7213 Ok(_) => {
7214 if let Err(e) = client.flush() {
7215 warn!("Failed to flush stream client: {}", e);
7216 } else {
7217 info!("Streamed {} records to {}", client.total_sent(), target_url);
7218 }
7219 }
7220 Err(e) => {
7221 warn!("Streaming export failed: {}", e);
7222 }
7223 }
7224 }
7225 Err(e) => {
7226 warn!("Failed to create stream client: {}", e);
7227 }
7228 }
7229 }
7230
7231 stats.graph_node_count += num_nodes;
7233 stats.graph_edge_count += num_edges;
7234 stats.graph_export_count += 1;
7235
7236 Ok(HypergraphExportInfo {
7237 node_count: num_nodes,
7238 edge_count: num_edges,
7239 hyperedge_count: num_hyperedges,
7240 output_path: hg_dir,
7241 })
7242 }
7243
7244 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7249 let pb = self.create_progress_bar(100, "Generating Banking Data");
7250
7251 let orchestrator = BankingOrchestratorBuilder::new()
7253 .config(self.config.banking.clone())
7254 .seed(self.seed + 9000)
7255 .country_pack(self.primary_pack().clone())
7256 .build();
7257
7258 if let Some(pb) = &pb {
7259 pb.inc(10);
7260 }
7261
7262 let result = orchestrator.generate();
7264
7265 if let Some(pb) = &pb {
7266 pb.inc(90);
7267 pb.finish_with_message(format!(
7268 "Banking: {} customers, {} transactions",
7269 result.customers.len(),
7270 result.transactions.len()
7271 ));
7272 }
7273
7274 let mut banking_customers = result.customers;
7279 let core_customers = &self.master_data.customers;
7280 if !core_customers.is_empty() {
7281 for (i, bc) in banking_customers.iter_mut().enumerate() {
7282 let core = &core_customers[i % core_customers.len()];
7283 bc.name = CustomerName::business(&core.name);
7284 bc.residence_country = core.country.clone();
7285 bc.enterprise_customer_id = Some(core.customer_id.clone());
7286 }
7287 debug!(
7288 "Cross-referenced {} banking customers with {} core customers",
7289 banking_customers.len(),
7290 core_customers.len()
7291 );
7292 }
7293
7294 Ok(BankingSnapshot {
7295 customers: banking_customers,
7296 accounts: result.accounts,
7297 transactions: result.transactions,
7298 transaction_labels: result.transaction_labels,
7299 customer_labels: result.customer_labels,
7300 account_labels: result.account_labels,
7301 relationship_labels: result.relationship_labels,
7302 narratives: result.narratives,
7303 suspicious_count: result.stats.suspicious_count,
7304 scenario_count: result.scenarios.len(),
7305 })
7306 }
7307
7308 fn calculate_total_transactions(&self) -> u64 {
7310 let months = self.config.global.period_months as f64;
7311 self.config
7312 .companies
7313 .iter()
7314 .map(|c| {
7315 let annual = c.annual_transaction_volume.count() as f64;
7316 let weighted = annual * c.volume_weight;
7317 (weighted * months / 12.0) as u64
7318 })
7319 .sum()
7320 }
7321
7322 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7324 if !self.phase_config.show_progress {
7325 return None;
7326 }
7327
7328 let pb = if let Some(mp) = &self.multi_progress {
7329 mp.add(ProgressBar::new(total))
7330 } else {
7331 ProgressBar::new(total)
7332 };
7333
7334 pb.set_style(
7335 ProgressStyle::default_bar()
7336 .template(&format!(
7337 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7338 message
7339 ))
7340 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7341 .progress_chars("#>-"),
7342 );
7343
7344 Some(pb)
7345 }
7346
7347 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7349 self.coa.clone()
7350 }
7351
7352 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7354 &self.master_data
7355 }
7356
7357 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7359 use super::lineage::LineageGraphBuilder;
7360
7361 let mut builder = LineageGraphBuilder::new();
7362
7363 builder.add_config_section("config:global", "Global Config");
7365 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7366 builder.add_config_section("config:transactions", "Transaction Config");
7367
7368 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7370 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7371
7372 builder.configured_by("phase:coa", "config:chart_of_accounts");
7374 builder.configured_by("phase:je", "config:transactions");
7375
7376 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7378 builder.produced_by("output:je", "phase:je");
7379
7380 if self.phase_config.generate_master_data {
7382 builder.add_config_section("config:master_data", "Master Data Config");
7383 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7384 builder.configured_by("phase:master_data", "config:master_data");
7385 builder.input_to("phase:master_data", "phase:je");
7386 }
7387
7388 if self.phase_config.generate_document_flows {
7389 builder.add_config_section("config:document_flows", "Document Flow Config");
7390 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7391 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7392 builder.configured_by("phase:p2p", "config:document_flows");
7393 builder.configured_by("phase:o2c", "config:document_flows");
7394
7395 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7396 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7397 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7398 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7399 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7400
7401 builder.produced_by("output:po", "phase:p2p");
7402 builder.produced_by("output:gr", "phase:p2p");
7403 builder.produced_by("output:vi", "phase:p2p");
7404 builder.produced_by("output:so", "phase:o2c");
7405 builder.produced_by("output:ci", "phase:o2c");
7406 }
7407
7408 if self.phase_config.inject_anomalies {
7409 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7410 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7411 builder.configured_by("phase:anomaly", "config:fraud");
7412 builder.add_output_file(
7413 "output:labels",
7414 "Anomaly Labels",
7415 "labels/anomaly_labels.csv",
7416 );
7417 builder.produced_by("output:labels", "phase:anomaly");
7418 }
7419
7420 if self.phase_config.generate_audit {
7421 builder.add_config_section("config:audit", "Audit Config");
7422 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7423 builder.configured_by("phase:audit", "config:audit");
7424 }
7425
7426 if self.phase_config.generate_banking {
7427 builder.add_config_section("config:banking", "Banking Config");
7428 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7429 builder.configured_by("phase:banking", "config:banking");
7430 }
7431
7432 if self.config.llm.enabled {
7433 builder.add_config_section("config:llm", "LLM Enrichment Config");
7434 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7435 builder.configured_by("phase:llm_enrichment", "config:llm");
7436 }
7437
7438 if self.config.diffusion.enabled {
7439 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7440 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7441 builder.configured_by("phase:diffusion", "config:diffusion");
7442 }
7443
7444 if self.config.causal.enabled {
7445 builder.add_config_section("config:causal", "Causal Generation Config");
7446 builder.add_generator_phase("phase:causal", "Causal Overlay");
7447 builder.configured_by("phase:causal", "config:causal");
7448 }
7449
7450 builder.build()
7451 }
7452}
7453
7454fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7456 match format {
7457 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7458 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7459 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7460 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7461 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7462 }
7463}
7464
7465#[cfg(test)]
7466#[allow(clippy::unwrap_used)]
7467mod tests {
7468 use super::*;
7469 use datasynth_config::schema::*;
7470
7471 fn create_test_config() -> GeneratorConfig {
7472 GeneratorConfig {
7473 global: GlobalConfig {
7474 industry: IndustrySector::Manufacturing,
7475 start_date: "2024-01-01".to_string(),
7476 period_months: 1,
7477 seed: Some(42),
7478 parallel: false,
7479 group_currency: "USD".to_string(),
7480 worker_threads: 0,
7481 memory_limit_mb: 0,
7482 fiscal_year_months: None,
7483 },
7484 companies: vec![CompanyConfig {
7485 code: "1000".to_string(),
7486 name: "Test Company".to_string(),
7487 currency: "USD".to_string(),
7488 country: "US".to_string(),
7489 annual_transaction_volume: TransactionVolume::TenK,
7490 volume_weight: 1.0,
7491 fiscal_year_variant: "K4".to_string(),
7492 }],
7493 chart_of_accounts: ChartOfAccountsConfig {
7494 complexity: CoAComplexity::Small,
7495 industry_specific: true,
7496 custom_accounts: None,
7497 min_hierarchy_depth: 2,
7498 max_hierarchy_depth: 4,
7499 },
7500 transactions: TransactionConfig::default(),
7501 output: OutputConfig::default(),
7502 fraud: FraudConfig::default(),
7503 internal_controls: InternalControlsConfig::default(),
7504 business_processes: BusinessProcessConfig::default(),
7505 user_personas: UserPersonaConfig::default(),
7506 templates: TemplateConfig::default(),
7507 approval: ApprovalConfig::default(),
7508 departments: DepartmentConfig::default(),
7509 master_data: MasterDataConfig::default(),
7510 document_flows: DocumentFlowConfig::default(),
7511 intercompany: IntercompanyConfig::default(),
7512 balance: BalanceConfig::default(),
7513 ocpm: OcpmConfig::default(),
7514 audit: AuditGenerationConfig::default(),
7515 banking: datasynth_banking::BankingConfig::default(),
7516 data_quality: DataQualitySchemaConfig::default(),
7517 scenario: ScenarioConfig::default(),
7518 temporal: TemporalDriftConfig::default(),
7519 graph_export: GraphExportConfig::default(),
7520 streaming: StreamingSchemaConfig::default(),
7521 rate_limit: RateLimitSchemaConfig::default(),
7522 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7523 relationships: RelationshipSchemaConfig::default(),
7524 accounting_standards: AccountingStandardsConfig::default(),
7525 audit_standards: AuditStandardsConfig::default(),
7526 distributions: Default::default(),
7527 temporal_patterns: Default::default(),
7528 vendor_network: VendorNetworkSchemaConfig::default(),
7529 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7530 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7531 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7532 organizational_events: OrganizationalEventsSchemaConfig::default(),
7533 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7534 market_drift: MarketDriftSchemaConfig::default(),
7535 drift_labeling: DriftLabelingSchemaConfig::default(),
7536 anomaly_injection: Default::default(),
7537 industry_specific: Default::default(),
7538 fingerprint_privacy: Default::default(),
7539 quality_gates: Default::default(),
7540 compliance: Default::default(),
7541 webhooks: Default::default(),
7542 llm: Default::default(),
7543 diffusion: Default::default(),
7544 causal: Default::default(),
7545 source_to_pay: Default::default(),
7546 financial_reporting: Default::default(),
7547 hr: Default::default(),
7548 manufacturing: Default::default(),
7549 sales_quotes: Default::default(),
7550 tax: Default::default(),
7551 treasury: Default::default(),
7552 project_accounting: Default::default(),
7553 esg: Default::default(),
7554 country_packs: None,
7555 scenarios: Default::default(),
7556 session: Default::default(),
7557 }
7558 }
7559
7560 #[test]
7561 fn test_enhanced_orchestrator_creation() {
7562 let config = create_test_config();
7563 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7564 assert!(orchestrator.is_ok());
7565 }
7566
7567 #[test]
7568 fn test_minimal_generation() {
7569 let config = create_test_config();
7570 let phase_config = PhaseConfig {
7571 generate_master_data: false,
7572 generate_document_flows: false,
7573 generate_journal_entries: true,
7574 inject_anomalies: false,
7575 show_progress: false,
7576 ..Default::default()
7577 };
7578
7579 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7580 let result = orchestrator.generate();
7581
7582 assert!(result.is_ok());
7583 let result = result.unwrap();
7584 assert!(!result.journal_entries.is_empty());
7585 }
7586
7587 #[test]
7588 fn test_master_data_generation() {
7589 let config = create_test_config();
7590 let phase_config = PhaseConfig {
7591 generate_master_data: true,
7592 generate_document_flows: false,
7593 generate_journal_entries: false,
7594 inject_anomalies: false,
7595 show_progress: false,
7596 vendors_per_company: 5,
7597 customers_per_company: 5,
7598 materials_per_company: 10,
7599 assets_per_company: 5,
7600 employees_per_company: 10,
7601 ..Default::default()
7602 };
7603
7604 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7605 let result = orchestrator.generate().unwrap();
7606
7607 assert!(!result.master_data.vendors.is_empty());
7608 assert!(!result.master_data.customers.is_empty());
7609 assert!(!result.master_data.materials.is_empty());
7610 }
7611
7612 #[test]
7613 fn test_document_flow_generation() {
7614 let config = create_test_config();
7615 let phase_config = PhaseConfig {
7616 generate_master_data: true,
7617 generate_document_flows: true,
7618 generate_journal_entries: false,
7619 inject_anomalies: false,
7620 inject_data_quality: false,
7621 validate_balances: false,
7622 generate_ocpm_events: false,
7623 show_progress: false,
7624 vendors_per_company: 5,
7625 customers_per_company: 5,
7626 materials_per_company: 10,
7627 assets_per_company: 5,
7628 employees_per_company: 10,
7629 p2p_chains: 5,
7630 o2c_chains: 5,
7631 ..Default::default()
7632 };
7633
7634 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7635 let result = orchestrator.generate().unwrap();
7636
7637 assert!(!result.document_flows.p2p_chains.is_empty());
7639 assert!(!result.document_flows.o2c_chains.is_empty());
7640
7641 assert!(!result.document_flows.purchase_orders.is_empty());
7643 assert!(!result.document_flows.sales_orders.is_empty());
7644 }
7645
7646 #[test]
7647 fn test_anomaly_injection() {
7648 let config = create_test_config();
7649 let phase_config = PhaseConfig {
7650 generate_master_data: false,
7651 generate_document_flows: false,
7652 generate_journal_entries: true,
7653 inject_anomalies: true,
7654 show_progress: false,
7655 ..Default::default()
7656 };
7657
7658 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7659 let result = orchestrator.generate().unwrap();
7660
7661 assert!(!result.journal_entries.is_empty());
7663
7664 assert!(result.anomaly_labels.summary.is_some());
7667 }
7668
7669 #[test]
7670 fn test_full_generation_pipeline() {
7671 let config = create_test_config();
7672 let phase_config = PhaseConfig {
7673 generate_master_data: true,
7674 generate_document_flows: true,
7675 generate_journal_entries: true,
7676 inject_anomalies: false,
7677 inject_data_quality: false,
7678 validate_balances: true,
7679 generate_ocpm_events: false,
7680 show_progress: false,
7681 vendors_per_company: 3,
7682 customers_per_company: 3,
7683 materials_per_company: 5,
7684 assets_per_company: 3,
7685 employees_per_company: 5,
7686 p2p_chains: 3,
7687 o2c_chains: 3,
7688 ..Default::default()
7689 };
7690
7691 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7692 let result = orchestrator.generate().unwrap();
7693
7694 assert!(!result.master_data.vendors.is_empty());
7696 assert!(!result.master_data.customers.is_empty());
7697 assert!(!result.document_flows.p2p_chains.is_empty());
7698 assert!(!result.document_flows.o2c_chains.is_empty());
7699 assert!(!result.journal_entries.is_empty());
7700 assert!(result.statistics.accounts_count > 0);
7701
7702 assert!(!result.subledger.ap_invoices.is_empty());
7704 assert!(!result.subledger.ar_invoices.is_empty());
7705
7706 assert!(result.balance_validation.validated);
7708 assert!(result.balance_validation.entries_processed > 0);
7709 }
7710
7711 #[test]
7712 fn test_subledger_linking() {
7713 let config = create_test_config();
7714 let phase_config = PhaseConfig {
7715 generate_master_data: true,
7716 generate_document_flows: true,
7717 generate_journal_entries: false,
7718 inject_anomalies: false,
7719 inject_data_quality: false,
7720 validate_balances: false,
7721 generate_ocpm_events: false,
7722 show_progress: false,
7723 vendors_per_company: 5,
7724 customers_per_company: 5,
7725 materials_per_company: 10,
7726 assets_per_company: 3,
7727 employees_per_company: 5,
7728 p2p_chains: 5,
7729 o2c_chains: 5,
7730 ..Default::default()
7731 };
7732
7733 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7734 let result = orchestrator.generate().unwrap();
7735
7736 assert!(!result.document_flows.vendor_invoices.is_empty());
7738 assert!(!result.document_flows.customer_invoices.is_empty());
7739
7740 assert!(!result.subledger.ap_invoices.is_empty());
7742 assert!(!result.subledger.ar_invoices.is_empty());
7743
7744 assert_eq!(
7746 result.subledger.ap_invoices.len(),
7747 result.document_flows.vendor_invoices.len()
7748 );
7749
7750 assert_eq!(
7752 result.subledger.ar_invoices.len(),
7753 result.document_flows.customer_invoices.len()
7754 );
7755
7756 assert_eq!(
7758 result.statistics.ap_invoice_count,
7759 result.subledger.ap_invoices.len()
7760 );
7761 assert_eq!(
7762 result.statistics.ar_invoice_count,
7763 result.subledger.ar_invoices.len()
7764 );
7765 }
7766
7767 #[test]
7768 fn test_balance_validation() {
7769 let config = create_test_config();
7770 let phase_config = PhaseConfig {
7771 generate_master_data: false,
7772 generate_document_flows: false,
7773 generate_journal_entries: true,
7774 inject_anomalies: false,
7775 validate_balances: true,
7776 show_progress: false,
7777 ..Default::default()
7778 };
7779
7780 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7781 let result = orchestrator.generate().unwrap();
7782
7783 assert!(result.balance_validation.validated);
7785 assert!(result.balance_validation.entries_processed > 0);
7786
7787 assert!(!result.balance_validation.has_unbalanced_entries);
7789
7790 assert_eq!(
7792 result.balance_validation.total_debits,
7793 result.balance_validation.total_credits
7794 );
7795 }
7796
7797 #[test]
7798 fn test_statistics_accuracy() {
7799 let config = create_test_config();
7800 let phase_config = PhaseConfig {
7801 generate_master_data: true,
7802 generate_document_flows: false,
7803 generate_journal_entries: true,
7804 inject_anomalies: false,
7805 show_progress: false,
7806 vendors_per_company: 10,
7807 customers_per_company: 20,
7808 materials_per_company: 15,
7809 assets_per_company: 5,
7810 employees_per_company: 8,
7811 ..Default::default()
7812 };
7813
7814 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7815 let result = orchestrator.generate().unwrap();
7816
7817 assert_eq!(
7819 result.statistics.vendor_count,
7820 result.master_data.vendors.len()
7821 );
7822 assert_eq!(
7823 result.statistics.customer_count,
7824 result.master_data.customers.len()
7825 );
7826 assert_eq!(
7827 result.statistics.material_count,
7828 result.master_data.materials.len()
7829 );
7830 assert_eq!(
7831 result.statistics.total_entries as usize,
7832 result.journal_entries.len()
7833 );
7834 }
7835
7836 #[test]
7837 fn test_phase_config_defaults() {
7838 let config = PhaseConfig::default();
7839 assert!(config.generate_master_data);
7840 assert!(config.generate_document_flows);
7841 assert!(config.generate_journal_entries);
7842 assert!(!config.inject_anomalies);
7843 assert!(config.validate_balances);
7844 assert!(config.show_progress);
7845 assert!(config.vendors_per_company > 0);
7846 assert!(config.customers_per_company > 0);
7847 }
7848
7849 #[test]
7850 fn test_get_coa_before_generation() {
7851 let config = create_test_config();
7852 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7853
7854 assert!(orchestrator.get_coa().is_none());
7856 }
7857
7858 #[test]
7859 fn test_get_coa_after_generation() {
7860 let config = create_test_config();
7861 let phase_config = PhaseConfig {
7862 generate_master_data: false,
7863 generate_document_flows: false,
7864 generate_journal_entries: true,
7865 inject_anomalies: false,
7866 show_progress: false,
7867 ..Default::default()
7868 };
7869
7870 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7871 let _ = orchestrator.generate().unwrap();
7872
7873 assert!(orchestrator.get_coa().is_some());
7875 }
7876
7877 #[test]
7878 fn test_get_master_data() {
7879 let config = create_test_config();
7880 let phase_config = PhaseConfig {
7881 generate_master_data: true,
7882 generate_document_flows: false,
7883 generate_journal_entries: false,
7884 inject_anomalies: false,
7885 show_progress: false,
7886 vendors_per_company: 5,
7887 customers_per_company: 5,
7888 materials_per_company: 5,
7889 assets_per_company: 5,
7890 employees_per_company: 5,
7891 ..Default::default()
7892 };
7893
7894 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7895 let _ = orchestrator.generate().unwrap();
7896
7897 let master_data = orchestrator.get_master_data();
7898 assert!(!master_data.vendors.is_empty());
7899 }
7900
7901 #[test]
7902 fn test_with_progress_builder() {
7903 let config = create_test_config();
7904 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7905 .unwrap()
7906 .with_progress(false);
7907
7908 assert!(!orchestrator.phase_config.show_progress);
7910 }
7911
7912 #[test]
7913 fn test_multi_company_generation() {
7914 let mut config = create_test_config();
7915 config.companies.push(CompanyConfig {
7916 code: "2000".to_string(),
7917 name: "Subsidiary".to_string(),
7918 currency: "EUR".to_string(),
7919 country: "DE".to_string(),
7920 annual_transaction_volume: TransactionVolume::TenK,
7921 volume_weight: 0.5,
7922 fiscal_year_variant: "K4".to_string(),
7923 });
7924
7925 let phase_config = PhaseConfig {
7926 generate_master_data: true,
7927 generate_document_flows: false,
7928 generate_journal_entries: true,
7929 inject_anomalies: false,
7930 show_progress: false,
7931 vendors_per_company: 5,
7932 customers_per_company: 5,
7933 materials_per_company: 5,
7934 assets_per_company: 5,
7935 employees_per_company: 5,
7936 ..Default::default()
7937 };
7938
7939 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7940 let result = orchestrator.generate().unwrap();
7941
7942 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7945 assert!(result.statistics.companies_count == 2);
7946 }
7947
7948 #[test]
7949 fn test_empty_master_data_skips_document_flows() {
7950 let config = create_test_config();
7951 let phase_config = PhaseConfig {
7952 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7955 inject_anomalies: false,
7956 show_progress: false,
7957 ..Default::default()
7958 };
7959
7960 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7961 let result = orchestrator.generate().unwrap();
7962
7963 assert!(result.document_flows.p2p_chains.is_empty());
7965 assert!(result.document_flows.o2c_chains.is_empty());
7966 }
7967
7968 #[test]
7969 fn test_journal_entry_line_item_count() {
7970 let config = create_test_config();
7971 let phase_config = PhaseConfig {
7972 generate_master_data: false,
7973 generate_document_flows: false,
7974 generate_journal_entries: true,
7975 inject_anomalies: false,
7976 show_progress: false,
7977 ..Default::default()
7978 };
7979
7980 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7981 let result = orchestrator.generate().unwrap();
7982
7983 let calculated_line_items: u64 = result
7985 .journal_entries
7986 .iter()
7987 .map(|e| e.line_count() as u64)
7988 .sum();
7989 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7990 }
7991
7992 #[test]
7993 fn test_audit_generation() {
7994 let config = create_test_config();
7995 let phase_config = PhaseConfig {
7996 generate_master_data: false,
7997 generate_document_flows: false,
7998 generate_journal_entries: true,
7999 inject_anomalies: false,
8000 show_progress: false,
8001 generate_audit: true,
8002 audit_engagements: 2,
8003 workpapers_per_engagement: 5,
8004 evidence_per_workpaper: 2,
8005 risks_per_engagement: 3,
8006 findings_per_engagement: 2,
8007 judgments_per_engagement: 2,
8008 ..Default::default()
8009 };
8010
8011 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8012 let result = orchestrator.generate().unwrap();
8013
8014 assert_eq!(result.audit.engagements.len(), 2);
8016 assert!(!result.audit.workpapers.is_empty());
8017 assert!(!result.audit.evidence.is_empty());
8018 assert!(!result.audit.risk_assessments.is_empty());
8019 assert!(!result.audit.findings.is_empty());
8020 assert!(!result.audit.judgments.is_empty());
8021
8022 assert_eq!(
8024 result.statistics.audit_engagement_count,
8025 result.audit.engagements.len()
8026 );
8027 assert_eq!(
8028 result.statistics.audit_workpaper_count,
8029 result.audit.workpapers.len()
8030 );
8031 assert_eq!(
8032 result.statistics.audit_evidence_count,
8033 result.audit.evidence.len()
8034 );
8035 assert_eq!(
8036 result.statistics.audit_risk_count,
8037 result.audit.risk_assessments.len()
8038 );
8039 assert_eq!(
8040 result.statistics.audit_finding_count,
8041 result.audit.findings.len()
8042 );
8043 assert_eq!(
8044 result.statistics.audit_judgment_count,
8045 result.audit.judgments.len()
8046 );
8047 }
8048
8049 #[test]
8050 fn test_new_phases_disabled_by_default() {
8051 let config = create_test_config();
8052 assert!(!config.llm.enabled);
8054 assert!(!config.diffusion.enabled);
8055 assert!(!config.causal.enabled);
8056
8057 let phase_config = PhaseConfig {
8058 generate_master_data: false,
8059 generate_document_flows: false,
8060 generate_journal_entries: true,
8061 inject_anomalies: false,
8062 show_progress: false,
8063 ..Default::default()
8064 };
8065
8066 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8067 let result = orchestrator.generate().unwrap();
8068
8069 assert_eq!(result.statistics.llm_enrichment_ms, 0);
8071 assert_eq!(result.statistics.llm_vendors_enriched, 0);
8072 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
8073 assert_eq!(result.statistics.diffusion_samples_generated, 0);
8074 assert_eq!(result.statistics.causal_generation_ms, 0);
8075 assert_eq!(result.statistics.causal_samples_generated, 0);
8076 assert!(result.statistics.causal_validation_passed.is_none());
8077 }
8078
8079 #[test]
8080 fn test_llm_enrichment_enabled() {
8081 let mut config = create_test_config();
8082 config.llm.enabled = true;
8083 config.llm.max_vendor_enrichments = 3;
8084
8085 let phase_config = PhaseConfig {
8086 generate_master_data: true,
8087 generate_document_flows: false,
8088 generate_journal_entries: false,
8089 inject_anomalies: false,
8090 show_progress: false,
8091 vendors_per_company: 5,
8092 customers_per_company: 3,
8093 materials_per_company: 3,
8094 assets_per_company: 3,
8095 employees_per_company: 3,
8096 ..Default::default()
8097 };
8098
8099 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8100 let result = orchestrator.generate().unwrap();
8101
8102 assert!(result.statistics.llm_vendors_enriched > 0);
8104 assert!(result.statistics.llm_vendors_enriched <= 3);
8105 }
8106
8107 #[test]
8108 fn test_diffusion_enhancement_enabled() {
8109 let mut config = create_test_config();
8110 config.diffusion.enabled = true;
8111 config.diffusion.n_steps = 50;
8112 config.diffusion.sample_size = 20;
8113
8114 let phase_config = PhaseConfig {
8115 generate_master_data: false,
8116 generate_document_flows: false,
8117 generate_journal_entries: true,
8118 inject_anomalies: false,
8119 show_progress: false,
8120 ..Default::default()
8121 };
8122
8123 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8124 let result = orchestrator.generate().unwrap();
8125
8126 assert_eq!(result.statistics.diffusion_samples_generated, 20);
8128 }
8129
8130 #[test]
8131 fn test_causal_overlay_enabled() {
8132 let mut config = create_test_config();
8133 config.causal.enabled = true;
8134 config.causal.template = "fraud_detection".to_string();
8135 config.causal.sample_size = 100;
8136 config.causal.validate = true;
8137
8138 let phase_config = PhaseConfig {
8139 generate_master_data: false,
8140 generate_document_flows: false,
8141 generate_journal_entries: true,
8142 inject_anomalies: false,
8143 show_progress: false,
8144 ..Default::default()
8145 };
8146
8147 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8148 let result = orchestrator.generate().unwrap();
8149
8150 assert_eq!(result.statistics.causal_samples_generated, 100);
8152 assert!(result.statistics.causal_validation_passed.is_some());
8154 }
8155
8156 #[test]
8157 fn test_causal_overlay_revenue_cycle_template() {
8158 let mut config = create_test_config();
8159 config.causal.enabled = true;
8160 config.causal.template = "revenue_cycle".to_string();
8161 config.causal.sample_size = 50;
8162 config.causal.validate = false;
8163
8164 let phase_config = PhaseConfig {
8165 generate_master_data: false,
8166 generate_document_flows: false,
8167 generate_journal_entries: true,
8168 inject_anomalies: false,
8169 show_progress: false,
8170 ..Default::default()
8171 };
8172
8173 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8174 let result = orchestrator.generate().unwrap();
8175
8176 assert_eq!(result.statistics.causal_samples_generated, 50);
8178 assert!(result.statistics.causal_validation_passed.is_none());
8180 }
8181
8182 #[test]
8183 fn test_all_new_phases_enabled_together() {
8184 let mut config = create_test_config();
8185 config.llm.enabled = true;
8186 config.llm.max_vendor_enrichments = 2;
8187 config.diffusion.enabled = true;
8188 config.diffusion.n_steps = 20;
8189 config.diffusion.sample_size = 10;
8190 config.causal.enabled = true;
8191 config.causal.sample_size = 50;
8192 config.causal.validate = true;
8193
8194 let phase_config = PhaseConfig {
8195 generate_master_data: true,
8196 generate_document_flows: false,
8197 generate_journal_entries: true,
8198 inject_anomalies: false,
8199 show_progress: false,
8200 vendors_per_company: 5,
8201 customers_per_company: 3,
8202 materials_per_company: 3,
8203 assets_per_company: 3,
8204 employees_per_company: 3,
8205 ..Default::default()
8206 };
8207
8208 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8209 let result = orchestrator.generate().unwrap();
8210
8211 assert!(result.statistics.llm_vendors_enriched > 0);
8213 assert_eq!(result.statistics.diffusion_samples_generated, 10);
8214 assert_eq!(result.statistics.causal_samples_generated, 50);
8215 assert!(result.statistics.causal_validation_passed.is_some());
8216 }
8217
8218 #[test]
8219 fn test_statistics_serialization_with_new_fields() {
8220 let stats = EnhancedGenerationStatistics {
8221 total_entries: 100,
8222 total_line_items: 500,
8223 llm_enrichment_ms: 42,
8224 llm_vendors_enriched: 10,
8225 diffusion_enhancement_ms: 100,
8226 diffusion_samples_generated: 50,
8227 causal_generation_ms: 200,
8228 causal_samples_generated: 100,
8229 causal_validation_passed: Some(true),
8230 ..Default::default()
8231 };
8232
8233 let json = serde_json::to_string(&stats).unwrap();
8234 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8235
8236 assert_eq!(deserialized.llm_enrichment_ms, 42);
8237 assert_eq!(deserialized.llm_vendors_enriched, 10);
8238 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8239 assert_eq!(deserialized.diffusion_samples_generated, 50);
8240 assert_eq!(deserialized.causal_generation_ms, 200);
8241 assert_eq!(deserialized.causal_samples_generated, 100);
8242 assert_eq!(deserialized.causal_validation_passed, Some(true));
8243 }
8244
8245 #[test]
8246 fn test_statistics_backward_compat_deserialization() {
8247 let old_json = r#"{
8249 "total_entries": 100,
8250 "total_line_items": 500,
8251 "accounts_count": 50,
8252 "companies_count": 1,
8253 "period_months": 12,
8254 "vendor_count": 10,
8255 "customer_count": 20,
8256 "material_count": 15,
8257 "asset_count": 5,
8258 "employee_count": 8,
8259 "p2p_chain_count": 5,
8260 "o2c_chain_count": 5,
8261 "ap_invoice_count": 5,
8262 "ar_invoice_count": 5,
8263 "ocpm_event_count": 0,
8264 "ocpm_object_count": 0,
8265 "ocpm_case_count": 0,
8266 "audit_engagement_count": 0,
8267 "audit_workpaper_count": 0,
8268 "audit_evidence_count": 0,
8269 "audit_risk_count": 0,
8270 "audit_finding_count": 0,
8271 "audit_judgment_count": 0,
8272 "anomalies_injected": 0,
8273 "data_quality_issues": 0,
8274 "banking_customer_count": 0,
8275 "banking_account_count": 0,
8276 "banking_transaction_count": 0,
8277 "banking_suspicious_count": 0,
8278 "graph_export_count": 0,
8279 "graph_node_count": 0,
8280 "graph_edge_count": 0
8281 }"#;
8282
8283 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8284
8285 assert_eq!(stats.llm_enrichment_ms, 0);
8287 assert_eq!(stats.llm_vendors_enriched, 0);
8288 assert_eq!(stats.diffusion_enhancement_ms, 0);
8289 assert_eq!(stats.diffusion_samples_generated, 0);
8290 assert_eq!(stats.causal_generation_ms, 0);
8291 assert_eq!(stats.causal_samples_generated, 0);
8292 assert!(stats.causal_validation_passed.is_none());
8293 }
8294}