1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
40};
41use datasynth_core::models::sourcing::{
42 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
43 SupplierBid, SupplierQualification, SupplierScorecard,
44};
45use datasynth_core::models::subledger::ap::APInvoice;
46use datasynth_core::models::subledger::ar::ARInvoice;
47use datasynth_core::models::*;
48use datasynth_core::traits::Generator;
49use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
50use datasynth_fingerprint::{
51 io::FingerprintReader,
52 models::Fingerprint,
53 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
54};
55use datasynth_generators::{
56 AnomalyInjector,
58 AnomalyInjectorConfig,
59 AssetGenerator,
60 AuditEngagementGenerator,
62 BalanceTrackerConfig,
63 BankReconciliationGenerator,
65 BidEvaluationGenerator,
67 BidGenerator,
68 CatalogGenerator,
69 ChartOfAccountsGenerator,
71 ContractGenerator,
72 ControlGenerator,
74 ControlGeneratorConfig,
75 CustomerGenerator,
76 DataQualityConfig,
77 DataQualityInjector,
79 DataQualityStats,
80 DocumentFlowJeConfig,
82 DocumentFlowJeGenerator,
83 DocumentFlowLinker,
85 EmployeeGenerator,
86 EsgAnomalyLabel,
88 EvidenceGenerator,
89 FinancialStatementGenerator,
91 FindingGenerator,
92 JournalEntryGenerator,
93 JudgmentGenerator,
94 LatePaymentDistribution,
95 MaterialGenerator,
96 O2CDocumentChain,
97 O2CGenerator,
98 O2CGeneratorConfig,
99 O2CPaymentBehavior,
100 P2PDocumentChain,
101 P2PGenerator,
103 P2PGeneratorConfig,
104 P2PPaymentBehavior,
105 PaymentReference,
106 QualificationGenerator,
107 RfxGenerator,
108 RiskAssessmentGenerator,
109 RunningBalanceTracker,
111 ScorecardGenerator,
112 SourcingProjectGenerator,
113 SpendAnalysisGenerator,
114 ValidationError,
115 VendorGenerator,
117 WorkpaperGenerator,
118};
119use datasynth_graph::{
120 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
121 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
122 TransactionGraphConfig,
123};
124use datasynth_ocpm::{
125 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
126 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
127 OcpmUuidFactory, P2pDocuments, S2cDocuments,
128};
129
130use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
131use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
132use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
133use datasynth_core::llm::MockLlmProvider;
134use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
135use datasynth_core::models::documents::PaymentMethod;
136use datasynth_core::models::IndustrySector;
137use datasynth_generators::coa_generator::CoAFramework;
138use datasynth_generators::llm_enrichment::VendorLlmEnricher;
139use rayon::prelude::*;
140
141fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
147 let payment_behavior = &schema_config.payment_behavior;
148 let late_dist = &payment_behavior.late_payment_days_distribution;
149
150 P2PGeneratorConfig {
151 three_way_match_rate: schema_config.three_way_match_rate,
152 partial_delivery_rate: schema_config.partial_delivery_rate,
153 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
154 price_variance_rate: schema_config.price_variance_rate,
155 max_price_variance_percent: schema_config.max_price_variance_percent,
156 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
157 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
158 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
159 payment_method_distribution: vec![
160 (PaymentMethod::BankTransfer, 0.60),
161 (PaymentMethod::Check, 0.25),
162 (PaymentMethod::Wire, 0.10),
163 (PaymentMethod::CreditCard, 0.05),
164 ],
165 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
166 payment_behavior: P2PPaymentBehavior {
167 late_payment_rate: payment_behavior.late_payment_rate,
168 late_payment_distribution: LatePaymentDistribution {
169 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170 late_8_to_14: late_dist.late_8_to_14,
171 very_late_15_to_30: late_dist.very_late_15_to_30,
172 severely_late_31_to_60: late_dist.severely_late_31_to_60,
173 extremely_late_over_60: late_dist.extremely_late_over_60,
174 },
175 partial_payment_rate: payment_behavior.partial_payment_rate,
176 payment_correction_rate: payment_behavior.payment_correction_rate,
177 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
178 },
179 }
180}
181
182fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
184 let payment_behavior = &schema_config.payment_behavior;
185
186 O2CGeneratorConfig {
187 credit_check_failure_rate: schema_config.credit_check_failure_rate,
188 partial_shipment_rate: schema_config.partial_shipment_rate,
189 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
193 bad_debt_rate: schema_config.bad_debt_rate,
194 returns_rate: schema_config.return_rate,
195 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
196 payment_method_distribution: vec![
197 (PaymentMethod::BankTransfer, 0.50),
198 (PaymentMethod::Check, 0.30),
199 (PaymentMethod::Wire, 0.15),
200 (PaymentMethod::CreditCard, 0.05),
201 ],
202 payment_behavior: O2CPaymentBehavior {
203 partial_payment_rate: payment_behavior.partial_payments.rate,
204 short_payment_rate: payment_behavior.short_payments.rate,
205 max_short_percent: payment_behavior.short_payments.max_short_percent,
206 on_account_rate: payment_behavior.on_account_payments.rate,
207 payment_correction_rate: payment_behavior.payment_corrections.rate,
208 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
209 },
210 }
211}
212
213#[derive(Debug, Clone)]
215pub struct PhaseConfig {
216 pub generate_master_data: bool,
218 pub generate_document_flows: bool,
220 pub generate_ocpm_events: bool,
222 pub generate_journal_entries: bool,
224 pub inject_anomalies: bool,
226 pub inject_data_quality: bool,
228 pub validate_balances: bool,
230 pub show_progress: bool,
232 pub vendors_per_company: usize,
234 pub customers_per_company: usize,
236 pub materials_per_company: usize,
238 pub assets_per_company: usize,
240 pub employees_per_company: usize,
242 pub p2p_chains: usize,
244 pub o2c_chains: usize,
246 pub generate_audit: bool,
248 pub audit_engagements: usize,
250 pub workpapers_per_engagement: usize,
252 pub evidence_per_workpaper: usize,
254 pub risks_per_engagement: usize,
256 pub findings_per_engagement: usize,
258 pub judgments_per_engagement: usize,
260 pub generate_banking: bool,
262 pub generate_graph_export: bool,
264 pub generate_sourcing: bool,
266 pub generate_bank_reconciliation: bool,
268 pub generate_financial_statements: bool,
270 pub generate_accounting_standards: bool,
272 pub generate_manufacturing: bool,
274 pub generate_sales_kpi_budgets: bool,
276 pub generate_tax: bool,
278 pub generate_esg: bool,
280 pub generate_intercompany: bool,
282 pub generate_evolution_events: bool,
284 pub generate_counterfactuals: bool,
286 pub generate_compliance_regulations: bool,
288}
289
290impl Default for PhaseConfig {
291 fn default() -> Self {
292 Self {
293 generate_master_data: true,
294 generate_document_flows: true,
295 generate_ocpm_events: false, generate_journal_entries: true,
297 inject_anomalies: false,
298 inject_data_quality: false, validate_balances: true,
300 show_progress: true,
301 vendors_per_company: 50,
302 customers_per_company: 100,
303 materials_per_company: 200,
304 assets_per_company: 50,
305 employees_per_company: 100,
306 p2p_chains: 100,
307 o2c_chains: 100,
308 generate_audit: false, audit_engagements: 5,
310 workpapers_per_engagement: 20,
311 evidence_per_workpaper: 5,
312 risks_per_engagement: 15,
313 findings_per_engagement: 8,
314 judgments_per_engagement: 10,
315 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, }
330 }
331}
332
333#[derive(Debug, Clone, Default)]
335pub struct MasterDataSnapshot {
336 pub vendors: Vec<Vendor>,
338 pub customers: Vec<Customer>,
340 pub materials: Vec<Material>,
342 pub assets: Vec<FixedAsset>,
344 pub employees: Vec<Employee>,
346}
347
348#[derive(Debug, Clone)]
350pub struct HypergraphExportInfo {
351 pub node_count: usize,
353 pub edge_count: usize,
355 pub hyperedge_count: usize,
357 pub output_path: PathBuf,
359}
360
361#[derive(Debug, Clone, Default)]
363pub struct DocumentFlowSnapshot {
364 pub p2p_chains: Vec<P2PDocumentChain>,
366 pub o2c_chains: Vec<O2CDocumentChain>,
368 pub purchase_orders: Vec<documents::PurchaseOrder>,
370 pub goods_receipts: Vec<documents::GoodsReceipt>,
372 pub vendor_invoices: Vec<documents::VendorInvoice>,
374 pub sales_orders: Vec<documents::SalesOrder>,
376 pub deliveries: Vec<documents::Delivery>,
378 pub customer_invoices: Vec<documents::CustomerInvoice>,
380 pub payments: Vec<documents::Payment>,
382}
383
384#[derive(Debug, Clone, Default)]
386pub struct SubledgerSnapshot {
387 pub ap_invoices: Vec<APInvoice>,
389 pub ar_invoices: Vec<ARInvoice>,
391 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
393 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
395 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
397}
398
399#[derive(Debug, Clone, Default)]
401pub struct OcpmSnapshot {
402 pub event_log: Option<OcpmEventLog>,
404 pub event_count: usize,
406 pub object_count: usize,
408 pub case_count: usize,
410}
411
412#[derive(Debug, Clone, Default)]
414pub struct AuditSnapshot {
415 pub engagements: Vec<AuditEngagement>,
417 pub workpapers: Vec<Workpaper>,
419 pub evidence: Vec<AuditEvidence>,
421 pub risk_assessments: Vec<RiskAssessment>,
423 pub findings: Vec<AuditFinding>,
425 pub judgments: Vec<ProfessionalJudgment>,
427}
428
429#[derive(Debug, Clone, Default)]
431pub struct BankingSnapshot {
432 pub customers: Vec<BankingCustomer>,
434 pub accounts: Vec<BankAccount>,
436 pub transactions: Vec<BankTransaction>,
438 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
440 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
442 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
444 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
446 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
448 pub suspicious_count: usize,
450 pub scenario_count: usize,
452}
453
454#[derive(Debug, Clone, Default, Serialize)]
456pub struct GraphExportSnapshot {
457 pub exported: bool,
459 pub graph_count: usize,
461 pub exports: HashMap<String, GraphExportInfo>,
463}
464
465#[derive(Debug, Clone, Serialize)]
467pub struct GraphExportInfo {
468 pub name: String,
470 pub format: String,
472 pub output_path: PathBuf,
474 pub node_count: usize,
476 pub edge_count: usize,
478}
479
480#[derive(Debug, Clone, Default)]
482pub struct SourcingSnapshot {
483 pub spend_analyses: Vec<SpendAnalysis>,
485 pub sourcing_projects: Vec<SourcingProject>,
487 pub qualifications: Vec<SupplierQualification>,
489 pub rfx_events: Vec<RfxEvent>,
491 pub bids: Vec<SupplierBid>,
493 pub bid_evaluations: Vec<BidEvaluation>,
495 pub contracts: Vec<ProcurementContract>,
497 pub catalog_items: Vec<CatalogItem>,
499 pub scorecards: Vec<SupplierScorecard>,
501}
502
503#[derive(Debug, Clone, Serialize, Deserialize)]
505pub struct PeriodTrialBalance {
506 pub fiscal_year: u16,
508 pub fiscal_period: u8,
510 pub period_start: NaiveDate,
512 pub period_end: NaiveDate,
514 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
516}
517
518#[derive(Debug, Clone, Default)]
520pub struct FinancialReportingSnapshot {
521 pub financial_statements: Vec<FinancialStatement>,
523 pub bank_reconciliations: Vec<BankReconciliation>,
525 pub trial_balances: Vec<PeriodTrialBalance>,
527}
528
529#[derive(Debug, Clone, Default)]
531pub struct HrSnapshot {
532 pub payroll_runs: Vec<PayrollRun>,
534 pub payroll_line_items: Vec<PayrollLineItem>,
536 pub time_entries: Vec<TimeEntry>,
538 pub expense_reports: Vec<ExpenseReport>,
540 pub benefit_enrollments: Vec<BenefitEnrollment>,
542 pub payroll_run_count: usize,
544 pub payroll_line_item_count: usize,
546 pub time_entry_count: usize,
548 pub expense_report_count: usize,
550 pub benefit_enrollment_count: usize,
552}
553
554#[derive(Debug, Clone, Default)]
556pub struct AccountingStandardsSnapshot {
557 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
559 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
561 pub revenue_contract_count: usize,
563 pub impairment_test_count: usize,
565}
566
567#[derive(Debug, Clone, Default)]
569pub struct ComplianceRegulationsSnapshot {
570 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
572 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
574 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
576 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
578 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
580 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
582 pub compliance_graph: Option<datasynth_graph::Graph>,
584}
585
586#[derive(Debug, Clone, Default)]
588pub struct ManufacturingSnapshot {
589 pub production_orders: Vec<ProductionOrder>,
591 pub quality_inspections: Vec<QualityInspection>,
593 pub cycle_counts: Vec<CycleCount>,
595 pub bom_components: Vec<BomComponent>,
597 pub inventory_movements: Vec<InventoryMovement>,
599 pub production_order_count: usize,
601 pub quality_inspection_count: usize,
603 pub cycle_count_count: usize,
605 pub bom_component_count: usize,
607 pub inventory_movement_count: usize,
609}
610
611#[derive(Debug, Clone, Default)]
613pub struct SalesKpiBudgetsSnapshot {
614 pub sales_quotes: Vec<SalesQuote>,
616 pub kpis: Vec<ManagementKpi>,
618 pub budgets: Vec<Budget>,
620 pub sales_quote_count: usize,
622 pub kpi_count: usize,
624 pub budget_line_count: usize,
626}
627
628#[derive(Debug, Clone, Default)]
630pub struct AnomalyLabels {
631 pub labels: Vec<LabeledAnomaly>,
633 pub summary: Option<AnomalySummary>,
635 pub by_type: HashMap<String, usize>,
637}
638
639#[derive(Debug, Clone, Default)]
641pub struct BalanceValidationResult {
642 pub validated: bool,
644 pub is_balanced: bool,
646 pub entries_processed: u64,
648 pub total_debits: rust_decimal::Decimal,
650 pub total_credits: rust_decimal::Decimal,
652 pub accounts_tracked: usize,
654 pub companies_tracked: usize,
656 pub validation_errors: Vec<ValidationError>,
658 pub has_unbalanced_entries: bool,
660}
661
662#[derive(Debug, Clone, Default)]
664pub struct TaxSnapshot {
665 pub jurisdictions: Vec<TaxJurisdiction>,
667 pub codes: Vec<TaxCode>,
669 pub tax_lines: Vec<TaxLine>,
671 pub tax_returns: Vec<TaxReturn>,
673 pub tax_provisions: Vec<TaxProvision>,
675 pub withholding_records: Vec<WithholdingTaxRecord>,
677 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
679 pub jurisdiction_count: usize,
681 pub code_count: usize,
683}
684
685#[derive(Debug, Clone, Default, Serialize, Deserialize)]
687pub struct IntercompanySnapshot {
688 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
690 pub seller_journal_entries: Vec<JournalEntry>,
692 pub buyer_journal_entries: Vec<JournalEntry>,
694 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
696 pub matched_pair_count: usize,
698 pub elimination_entry_count: usize,
700 pub match_rate: f64,
702}
703
704#[derive(Debug, Clone, Default)]
706pub struct EsgSnapshot {
707 pub emissions: Vec<EmissionRecord>,
709 pub energy: Vec<EnergyConsumption>,
711 pub water: Vec<WaterUsage>,
713 pub waste: Vec<WasteRecord>,
715 pub diversity: Vec<WorkforceDiversityMetric>,
717 pub pay_equity: Vec<PayEquityMetric>,
719 pub safety_incidents: Vec<SafetyIncident>,
721 pub safety_metrics: Vec<SafetyMetric>,
723 pub governance: Vec<GovernanceMetric>,
725 pub supplier_assessments: Vec<SupplierEsgAssessment>,
727 pub materiality: Vec<MaterialityAssessment>,
729 pub disclosures: Vec<EsgDisclosure>,
731 pub climate_scenarios: Vec<ClimateScenario>,
733 pub anomaly_labels: Vec<EsgAnomalyLabel>,
735 pub emission_count: usize,
737 pub disclosure_count: usize,
739}
740
741#[derive(Debug, Clone, Default)]
743pub struct TreasurySnapshot {
744 pub cash_positions: Vec<CashPosition>,
746 pub cash_forecasts: Vec<CashForecast>,
748 pub cash_pools: Vec<CashPool>,
750 pub cash_pool_sweeps: Vec<CashPoolSweep>,
752 pub hedging_instruments: Vec<HedgingInstrument>,
754 pub hedge_relationships: Vec<HedgeRelationship>,
756 pub debt_instruments: Vec<DebtInstrument>,
758 pub bank_guarantees: Vec<BankGuarantee>,
760 pub netting_runs: Vec<NettingRun>,
762 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
764}
765
766#[derive(Debug, Clone, Default)]
768pub struct ProjectAccountingSnapshot {
769 pub projects: Vec<Project>,
771 pub cost_lines: Vec<ProjectCostLine>,
773 pub revenue_records: Vec<ProjectRevenue>,
775 pub earned_value_metrics: Vec<EarnedValueMetric>,
777 pub change_orders: Vec<ChangeOrder>,
779 pub milestones: Vec<ProjectMilestone>,
781}
782
783#[derive(Debug)]
785pub struct EnhancedGenerationResult {
786 pub chart_of_accounts: ChartOfAccounts,
788 pub master_data: MasterDataSnapshot,
790 pub document_flows: DocumentFlowSnapshot,
792 pub subledger: SubledgerSnapshot,
794 pub ocpm: OcpmSnapshot,
796 pub audit: AuditSnapshot,
798 pub banking: BankingSnapshot,
800 pub graph_export: GraphExportSnapshot,
802 pub sourcing: SourcingSnapshot,
804 pub financial_reporting: FinancialReportingSnapshot,
806 pub hr: HrSnapshot,
808 pub accounting_standards: AccountingStandardsSnapshot,
810 pub manufacturing: ManufacturingSnapshot,
812 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
814 pub tax: TaxSnapshot,
816 pub esg: EsgSnapshot,
818 pub treasury: TreasurySnapshot,
820 pub project_accounting: ProjectAccountingSnapshot,
822 pub process_evolution: Vec<ProcessEvolutionEvent>,
824 pub organizational_events: Vec<OrganizationalEvent>,
826 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
828 pub intercompany: IntercompanySnapshot,
830 pub journal_entries: Vec<JournalEntry>,
832 pub anomaly_labels: AnomalyLabels,
834 pub balance_validation: BalanceValidationResult,
836 pub data_quality_stats: DataQualityStats,
838 pub statistics: EnhancedGenerationStatistics,
840 pub lineage: Option<super::lineage::LineageGraph>,
842 pub gate_result: Option<datasynth_eval::gates::GateResult>,
844 pub internal_controls: Vec<InternalControl>,
846 pub opening_balances: Vec<GeneratedOpeningBalance>,
848 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
850 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
852 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
854 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
856 pub temporal_vendor_chains:
858 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
859 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
861 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
863 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
865 pub compliance_regulations: ComplianceRegulationsSnapshot,
867}
868
869#[derive(Debug, Clone, Default, Serialize, Deserialize)]
871pub struct EnhancedGenerationStatistics {
872 pub total_entries: u64,
874 pub total_line_items: u64,
876 pub accounts_count: usize,
878 pub companies_count: usize,
880 pub period_months: u32,
882 pub vendor_count: usize,
884 pub customer_count: usize,
885 pub material_count: usize,
886 pub asset_count: usize,
887 pub employee_count: usize,
888 pub p2p_chain_count: usize,
890 pub o2c_chain_count: usize,
891 pub ap_invoice_count: usize,
893 pub ar_invoice_count: usize,
894 pub ocpm_event_count: usize,
896 pub ocpm_object_count: usize,
897 pub ocpm_case_count: usize,
898 pub audit_engagement_count: usize,
900 pub audit_workpaper_count: usize,
901 pub audit_evidence_count: usize,
902 pub audit_risk_count: usize,
903 pub audit_finding_count: usize,
904 pub audit_judgment_count: usize,
905 pub anomalies_injected: usize,
907 pub data_quality_issues: usize,
909 pub banking_customer_count: usize,
911 pub banking_account_count: usize,
912 pub banking_transaction_count: usize,
913 pub banking_suspicious_count: usize,
914 pub graph_export_count: usize,
916 pub graph_node_count: usize,
917 pub graph_edge_count: usize,
918 #[serde(default)]
920 pub llm_enrichment_ms: u64,
921 #[serde(default)]
923 pub llm_vendors_enriched: usize,
924 #[serde(default)]
926 pub diffusion_enhancement_ms: u64,
927 #[serde(default)]
929 pub diffusion_samples_generated: usize,
930 #[serde(default)]
932 pub causal_generation_ms: u64,
933 #[serde(default)]
935 pub causal_samples_generated: usize,
936 #[serde(default)]
938 pub causal_validation_passed: Option<bool>,
939 #[serde(default)]
941 pub sourcing_project_count: usize,
942 #[serde(default)]
943 pub rfx_event_count: usize,
944 #[serde(default)]
945 pub bid_count: usize,
946 #[serde(default)]
947 pub contract_count: usize,
948 #[serde(default)]
949 pub catalog_item_count: usize,
950 #[serde(default)]
951 pub scorecard_count: usize,
952 #[serde(default)]
954 pub financial_statement_count: usize,
955 #[serde(default)]
956 pub bank_reconciliation_count: usize,
957 #[serde(default)]
959 pub payroll_run_count: usize,
960 #[serde(default)]
961 pub time_entry_count: usize,
962 #[serde(default)]
963 pub expense_report_count: usize,
964 #[serde(default)]
965 pub benefit_enrollment_count: usize,
966 #[serde(default)]
968 pub revenue_contract_count: usize,
969 #[serde(default)]
970 pub impairment_test_count: usize,
971 #[serde(default)]
973 pub production_order_count: usize,
974 #[serde(default)]
975 pub quality_inspection_count: usize,
976 #[serde(default)]
977 pub cycle_count_count: usize,
978 #[serde(default)]
979 pub bom_component_count: usize,
980 #[serde(default)]
981 pub inventory_movement_count: usize,
982 #[serde(default)]
984 pub sales_quote_count: usize,
985 #[serde(default)]
986 pub kpi_count: usize,
987 #[serde(default)]
988 pub budget_line_count: usize,
989 #[serde(default)]
991 pub tax_jurisdiction_count: usize,
992 #[serde(default)]
993 pub tax_code_count: usize,
994 #[serde(default)]
996 pub esg_emission_count: usize,
997 #[serde(default)]
998 pub esg_disclosure_count: usize,
999 #[serde(default)]
1001 pub ic_matched_pair_count: usize,
1002 #[serde(default)]
1003 pub ic_elimination_count: usize,
1004 #[serde(default)]
1006 pub ic_transaction_count: usize,
1007 #[serde(default)]
1009 pub fa_subledger_count: usize,
1010 #[serde(default)]
1012 pub inventory_subledger_count: usize,
1013 #[serde(default)]
1015 pub treasury_debt_instrument_count: usize,
1016 #[serde(default)]
1018 pub treasury_hedging_instrument_count: usize,
1019 #[serde(default)]
1021 pub project_count: usize,
1022 #[serde(default)]
1024 pub project_change_order_count: usize,
1025 #[serde(default)]
1027 pub tax_provision_count: usize,
1028 #[serde(default)]
1030 pub opening_balance_count: usize,
1031 #[serde(default)]
1033 pub subledger_reconciliation_count: usize,
1034 #[serde(default)]
1036 pub tax_line_count: usize,
1037 #[serde(default)]
1039 pub project_cost_line_count: usize,
1040 #[serde(default)]
1042 pub cash_position_count: usize,
1043 #[serde(default)]
1045 pub cash_forecast_count: usize,
1046 #[serde(default)]
1048 pub cash_pool_count: usize,
1049 #[serde(default)]
1051 pub process_evolution_event_count: usize,
1052 #[serde(default)]
1054 pub organizational_event_count: usize,
1055 #[serde(default)]
1057 pub counterfactual_pair_count: usize,
1058 #[serde(default)]
1060 pub red_flag_count: usize,
1061 #[serde(default)]
1063 pub collusion_ring_count: usize,
1064 #[serde(default)]
1066 pub temporal_version_chain_count: usize,
1067 #[serde(default)]
1069 pub entity_relationship_node_count: usize,
1070 #[serde(default)]
1072 pub entity_relationship_edge_count: usize,
1073 #[serde(default)]
1075 pub cross_process_link_count: usize,
1076 #[serde(default)]
1078 pub disruption_event_count: usize,
1079 #[serde(default)]
1081 pub industry_gl_account_count: usize,
1082}
1083
1084pub struct EnhancedOrchestrator {
1086 config: GeneratorConfig,
1087 phase_config: PhaseConfig,
1088 coa: Option<Arc<ChartOfAccounts>>,
1089 master_data: MasterDataSnapshot,
1090 seed: u64,
1091 multi_progress: Option<MultiProgress>,
1092 resource_guard: ResourceGuard,
1094 output_path: Option<PathBuf>,
1096 copula_generators: Vec<CopulaGeneratorSpec>,
1098 country_pack_registry: datasynth_core::CountryPackRegistry,
1100 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1102}
1103
1104impl EnhancedOrchestrator {
1105 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1107 datasynth_config::validate_config(&config)?;
1108
1109 let seed = config.global.seed.unwrap_or_else(rand::random);
1110
1111 let resource_guard = Self::build_resource_guard(&config, None);
1113
1114 let country_pack_registry = match &config.country_packs {
1116 Some(cp) => {
1117 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1118 .map_err(|e| SynthError::config(e.to_string()))?
1119 }
1120 None => datasynth_core::CountryPackRegistry::builtin_only()
1121 .map_err(|e| SynthError::config(e.to_string()))?,
1122 };
1123
1124 Ok(Self {
1125 config,
1126 phase_config,
1127 coa: None,
1128 master_data: MasterDataSnapshot::default(),
1129 seed,
1130 multi_progress: None,
1131 resource_guard,
1132 output_path: None,
1133 copula_generators: Vec::new(),
1134 country_pack_registry,
1135 phase_sink: None,
1136 })
1137 }
1138
1139 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1141 Self::new(config, PhaseConfig::default())
1142 }
1143
1144 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1146 self.phase_sink = Some(sink);
1147 self
1148 }
1149
1150 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1152 if let Some(ref sink) = self.phase_sink {
1153 for item in items {
1154 if let Ok(value) = serde_json::to_value(item) {
1155 if let Err(e) = sink.emit(phase, type_name, &value) {
1156 warn!(
1157 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1158 );
1159 }
1160 }
1161 }
1162 if let Err(e) = sink.phase_complete(phase) {
1163 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1164 }
1165 }
1166 }
1167
1168 pub fn with_progress(mut self, show: bool) -> Self {
1170 self.phase_config.show_progress = show;
1171 if show {
1172 self.multi_progress = Some(MultiProgress::new());
1173 }
1174 self
1175 }
1176
1177 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1179 let path = path.into();
1180 self.output_path = Some(path.clone());
1181 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1183 self
1184 }
1185
1186 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1188 &self.country_pack_registry
1189 }
1190
1191 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1193 self.country_pack_registry.get_by_str(country)
1194 }
1195
1196 fn primary_country_code(&self) -> &str {
1199 self.config
1200 .companies
1201 .first()
1202 .map(|c| c.country.as_str())
1203 .unwrap_or("US")
1204 }
1205
1206 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1208 self.country_pack_for(self.primary_country_code())
1209 }
1210
1211 fn resolve_coa_framework(&self) -> CoAFramework {
1213 if self.config.accounting_standards.enabled {
1214 match self.config.accounting_standards.framework {
1215 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1216 return CoAFramework::FrenchPcg;
1217 }
1218 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1219 return CoAFramework::GermanSkr04;
1220 }
1221 _ => {}
1222 }
1223 }
1224 let pack = self.primary_pack();
1226 match pack.accounting.framework.as_str() {
1227 "french_gaap" => CoAFramework::FrenchPcg,
1228 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1229 _ => CoAFramework::UsGaap,
1230 }
1231 }
1232
1233 pub fn has_copulas(&self) -> bool {
1238 !self.copula_generators.is_empty()
1239 }
1240
1241 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1247 &self.copula_generators
1248 }
1249
1250 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1254 &mut self.copula_generators
1255 }
1256
1257 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1261 self.copula_generators
1262 .iter_mut()
1263 .find(|c| c.name == copula_name)
1264 .map(|c| c.generator.sample())
1265 }
1266
1267 pub fn from_fingerprint(
1290 fingerprint_path: &std::path::Path,
1291 phase_config: PhaseConfig,
1292 scale: f64,
1293 ) -> SynthResult<Self> {
1294 info!("Loading fingerprint from: {}", fingerprint_path.display());
1295
1296 let reader = FingerprintReader::new();
1298 let fingerprint = reader
1299 .read_from_file(fingerprint_path)
1300 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1301
1302 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1303 }
1304
1305 pub fn from_fingerprint_data(
1312 fingerprint: Fingerprint,
1313 phase_config: PhaseConfig,
1314 scale: f64,
1315 ) -> SynthResult<Self> {
1316 info!(
1317 "Synthesizing config from fingerprint (version: {}, tables: {})",
1318 fingerprint.manifest.version,
1319 fingerprint.schema.tables.len()
1320 );
1321
1322 let seed: u64 = rand::random();
1324
1325 let options = SynthesisOptions {
1327 scale,
1328 seed: Some(seed),
1329 preserve_correlations: true,
1330 inject_anomalies: true,
1331 };
1332 let synthesizer = ConfigSynthesizer::with_options(options);
1333
1334 let synthesis_result = synthesizer
1336 .synthesize_full(&fingerprint, seed)
1337 .map_err(|e| {
1338 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1339 })?;
1340
1341 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1343 Self::base_config_for_industry(industry)
1344 } else {
1345 Self::base_config_for_industry("manufacturing")
1346 };
1347
1348 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1350
1351 info!(
1353 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1354 fingerprint.schema.tables.len(),
1355 scale,
1356 synthesis_result.copula_generators.len()
1357 );
1358
1359 if !synthesis_result.copula_generators.is_empty() {
1360 for spec in &synthesis_result.copula_generators {
1361 info!(
1362 " Copula '{}' for table '{}': {} columns",
1363 spec.name,
1364 spec.table,
1365 spec.columns.len()
1366 );
1367 }
1368 }
1369
1370 let mut orchestrator = Self::new(config, phase_config)?;
1372
1373 orchestrator.copula_generators = synthesis_result.copula_generators;
1375
1376 Ok(orchestrator)
1377 }
1378
1379 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1381 use datasynth_config::presets::create_preset;
1382 use datasynth_config::TransactionVolume;
1383 use datasynth_core::models::{CoAComplexity, IndustrySector};
1384
1385 let sector = match industry.to_lowercase().as_str() {
1386 "manufacturing" => IndustrySector::Manufacturing,
1387 "retail" => IndustrySector::Retail,
1388 "financial" | "financial_services" => IndustrySector::FinancialServices,
1389 "healthcare" => IndustrySector::Healthcare,
1390 "technology" | "tech" => IndustrySector::Technology,
1391 _ => IndustrySector::Manufacturing,
1392 };
1393
1394 create_preset(
1396 sector,
1397 1, 12, CoAComplexity::Medium,
1400 TransactionVolume::TenK,
1401 )
1402 }
1403
1404 fn apply_config_patch(
1406 mut config: GeneratorConfig,
1407 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1408 ) -> GeneratorConfig {
1409 use datasynth_fingerprint::synthesis::ConfigValue;
1410
1411 for (key, value) in patch.values() {
1412 match (key.as_str(), value) {
1413 ("transactions.count", ConfigValue::Integer(n)) => {
1416 info!(
1417 "Fingerprint suggests {} transactions (apply via company volumes)",
1418 n
1419 );
1420 }
1421 ("global.period_months", ConfigValue::Integer(n)) => {
1422 config.global.period_months = (*n).clamp(1, 120) as u32;
1423 }
1424 ("global.start_date", ConfigValue::String(s)) => {
1425 config.global.start_date = s.clone();
1426 }
1427 ("global.seed", ConfigValue::Integer(n)) => {
1428 config.global.seed = Some(*n as u64);
1429 }
1430 ("fraud.enabled", ConfigValue::Bool(b)) => {
1431 config.fraud.enabled = *b;
1432 }
1433 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1434 config.fraud.fraud_rate = *f;
1435 }
1436 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1437 config.data_quality.enabled = *b;
1438 }
1439 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1441 config.fraud.enabled = *b;
1442 }
1443 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1444 config.fraud.fraud_rate = *f;
1445 }
1446 _ => {
1447 debug!("Ignoring unknown config patch key: {}", key);
1448 }
1449 }
1450 }
1451
1452 config
1453 }
1454
1455 fn build_resource_guard(
1457 config: &GeneratorConfig,
1458 output_path: Option<PathBuf>,
1459 ) -> ResourceGuard {
1460 let mut builder = ResourceGuardBuilder::new();
1461
1462 if config.global.memory_limit_mb > 0 {
1464 builder = builder.memory_limit(config.global.memory_limit_mb);
1465 }
1466
1467 if let Some(path) = output_path {
1469 builder = builder.output_path(path).min_free_disk(100); }
1471
1472 builder = builder.conservative();
1474
1475 builder.build()
1476 }
1477
1478 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1483 self.resource_guard.check()
1484 }
1485
1486 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1488 let level = self.resource_guard.check()?;
1489
1490 if level != DegradationLevel::Normal {
1491 warn!(
1492 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1493 phase,
1494 level,
1495 self.resource_guard.current_memory_mb(),
1496 self.resource_guard.available_disk_mb()
1497 );
1498 }
1499
1500 Ok(level)
1501 }
1502
1503 fn get_degradation_actions(&self) -> DegradationActions {
1505 self.resource_guard.get_actions()
1506 }
1507
1508 fn check_memory_limit(&self) -> SynthResult<()> {
1510 self.check_resources()?;
1511 Ok(())
1512 }
1513
1514 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1516 info!("Starting enhanced generation workflow");
1517 info!(
1518 "Config: industry={:?}, period_months={}, companies={}",
1519 self.config.global.industry,
1520 self.config.global.period_months,
1521 self.config.companies.len()
1522 );
1523
1524 let initial_level = self.check_resources_with_log("initial")?;
1526 if initial_level == DegradationLevel::Emergency {
1527 return Err(SynthError::resource(
1528 "Insufficient resources to start generation",
1529 ));
1530 }
1531
1532 let mut stats = EnhancedGenerationStatistics {
1533 companies_count: self.config.companies.len(),
1534 period_months: self.config.global.period_months,
1535 ..Default::default()
1536 };
1537
1538 let coa = self.phase_chart_of_accounts(&mut stats)?;
1540
1541 self.phase_master_data(&mut stats)?;
1543
1544 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1546 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1547 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1548
1549 let (mut document_flows, subledger, fa_journal_entries) =
1551 self.phase_document_flows(&mut stats)?;
1552
1553 self.emit_phase_items(
1555 "document_flows",
1556 "PurchaseOrder",
1557 &document_flows.purchase_orders,
1558 );
1559 self.emit_phase_items(
1560 "document_flows",
1561 "GoodsReceipt",
1562 &document_flows.goods_receipts,
1563 );
1564 self.emit_phase_items(
1565 "document_flows",
1566 "VendorInvoice",
1567 &document_flows.vendor_invoices,
1568 );
1569 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1570 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1571
1572 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1574
1575 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1583
1584 if !fa_journal_entries.is_empty() {
1586 debug!(
1587 "Appending {} FA acquisition JEs to main entries",
1588 fa_journal_entries.len()
1589 );
1590 entries.extend(fa_journal_entries);
1591 }
1592
1593 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1595
1596 let actions = self.get_degradation_actions();
1598
1599 let sourcing = self.phase_sourcing_data(&mut stats)?;
1601
1602 if !sourcing.contracts.is_empty() {
1604 let mut linked_count = 0usize;
1605 for chain in &mut document_flows.p2p_chains {
1606 if chain.purchase_order.contract_id.is_none() {
1607 if let Some(contract) = sourcing
1608 .contracts
1609 .iter()
1610 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1611 {
1612 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1613 linked_count += 1;
1614 }
1615 }
1616 }
1617 if linked_count > 0 {
1618 debug!(
1619 "Linked {} purchase orders to S2C contracts by vendor match",
1620 linked_count
1621 );
1622 }
1623 }
1624
1625 let intercompany = self.phase_intercompany(&mut stats)?;
1627
1628 if !intercompany.seller_journal_entries.is_empty()
1630 || !intercompany.buyer_journal_entries.is_empty()
1631 {
1632 let ic_je_count = intercompany.seller_journal_entries.len()
1633 + intercompany.buyer_journal_entries.len();
1634 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1635 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1636 debug!(
1637 "Appended {} IC journal entries to main entries",
1638 ic_je_count
1639 );
1640 }
1641
1642 let hr = self.phase_hr_data(&mut stats)?;
1644
1645 if !hr.payroll_runs.is_empty() {
1647 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1648 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1649 entries.extend(payroll_jes);
1650 }
1651
1652 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1654
1655 if !manufacturing_snap.production_orders.is_empty() {
1657 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1658 debug!("Generated {} JEs from production orders", mfg_jes.len());
1659 entries.extend(mfg_jes);
1660 }
1661
1662 if !entries.is_empty() {
1665 stats.total_entries = entries.len() as u64;
1666 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1667 debug!(
1668 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1669 stats.total_entries, stats.total_line_items
1670 );
1671 }
1672
1673 if self.config.internal_controls.enabled && !entries.is_empty() {
1675 info!("Phase 7b: Applying internal controls to journal entries");
1676 let control_config = ControlGeneratorConfig {
1677 exception_rate: self.config.internal_controls.exception_rate,
1678 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1679 enable_sox_marking: true,
1680 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1681 self.config.internal_controls.sox_materiality_threshold,
1682 )
1683 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1684 };
1685 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1686 for entry in &mut entries {
1687 control_gen.apply_controls(entry, &coa);
1688 }
1689 let with_controls = entries
1690 .iter()
1691 .filter(|e| !e.header.control_ids.is_empty())
1692 .count();
1693 info!(
1694 "Applied controls to {} entries ({} with control IDs assigned)",
1695 entries.len(),
1696 with_controls
1697 );
1698 }
1699
1700 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1702
1703 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1705
1706 self.emit_phase_items(
1708 "anomaly_injection",
1709 "LabeledAnomaly",
1710 &anomaly_labels.labels,
1711 );
1712
1713 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1715
1716 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1718
1719 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1721
1722 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1724
1725 let balance_validation = self.phase_balance_validation(&entries)?;
1727
1728 let subledger_reconciliation =
1730 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1731
1732 let data_quality_stats =
1734 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1735
1736 let audit = self.phase_audit_data(&entries, &mut stats)?;
1738
1739 let banking = self.phase_banking_data(&mut stats)?;
1741
1742 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1744
1745 self.phase_llm_enrichment(&mut stats);
1747
1748 self.phase_diffusion_enhancement(&mut stats);
1750
1751 self.phase_causal_overlay(&mut stats);
1753
1754 let financial_reporting =
1756 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1757
1758 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1760
1761 let ocpm = self.phase_ocpm_events(
1763 &document_flows,
1764 &sourcing,
1765 &hr,
1766 &manufacturing_snap,
1767 &banking,
1768 &audit,
1769 &financial_reporting,
1770 &mut stats,
1771 )?;
1772
1773 if let Some(ref event_log) = ocpm.event_log {
1775 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1776 }
1777
1778 let sales_kpi_budgets =
1780 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1781
1782 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1784
1785 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1787
1788 let treasury =
1790 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1791
1792 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1794
1795 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1797
1798 let disruption_events = self.phase_disruption_events(&mut stats)?;
1800
1801 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1803
1804 let (entity_relationship_graph, cross_process_links) =
1806 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1807
1808 let industry_output = self.phase_industry_data(&mut stats);
1810
1811 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
1813
1814 self.phase_hypergraph_export(
1816 &coa,
1817 &entries,
1818 &document_flows,
1819 &sourcing,
1820 &hr,
1821 &manufacturing_snap,
1822 &banking,
1823 &audit,
1824 &financial_reporting,
1825 &ocpm,
1826 &compliance_regulations,
1827 &mut stats,
1828 )?;
1829
1830 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1833 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1834 }
1835
1836 if self.config.streaming.enabled {
1838 info!("Note: streaming config is enabled but batch mode does not use it");
1839 }
1840 if self.config.vendor_network.enabled {
1841 debug!("Vendor network config available; relationship graph generation is partial");
1842 }
1843 if self.config.customer_segmentation.enabled {
1844 debug!("Customer segmentation config available; segment-aware generation is partial");
1845 }
1846
1847 let resource_stats = self.resource_guard.stats();
1849 info!(
1850 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1851 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1852 resource_stats.disk.estimated_bytes_written,
1853 resource_stats.degradation_level
1854 );
1855
1856 if let Some(ref sink) = self.phase_sink {
1858 if let Err(e) = sink.flush() {
1859 warn!("Stream sink flush failed: {e}");
1860 }
1861 }
1862
1863 let lineage = self.build_lineage_graph();
1865
1866 let gate_result = if self.config.quality_gates.enabled {
1868 let profile_name = &self.config.quality_gates.profile;
1869 match datasynth_eval::gates::get_profile(profile_name) {
1870 Some(profile) => {
1871 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1873
1874 if balance_validation.validated {
1876 eval.coherence.balance =
1877 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1878 equation_balanced: balance_validation.is_balanced,
1879 max_imbalance: (balance_validation.total_debits
1880 - balance_validation.total_credits)
1881 .abs(),
1882 periods_evaluated: 1,
1883 periods_imbalanced: if balance_validation.is_balanced {
1884 0
1885 } else {
1886 1
1887 },
1888 period_results: Vec::new(),
1889 companies_evaluated: self.config.companies.len(),
1890 });
1891 }
1892
1893 eval.coherence.passes = balance_validation.is_balanced;
1895 if !balance_validation.is_balanced {
1896 eval.coherence
1897 .failures
1898 .push("Balance sheet equation not satisfied".to_string());
1899 }
1900
1901 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1903 eval.statistical.passes = !entries.is_empty();
1904
1905 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1908
1909 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1910 info!(
1911 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1912 profile_name, result.gates_passed, result.gates_total, result.summary
1913 );
1914 Some(result)
1915 }
1916 None => {
1917 warn!(
1918 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1919 profile_name
1920 );
1921 None
1922 }
1923 }
1924 } else {
1925 None
1926 };
1927
1928 let internal_controls = if self.config.internal_controls.enabled {
1930 InternalControl::standard_controls()
1931 } else {
1932 Vec::new()
1933 };
1934
1935 Ok(EnhancedGenerationResult {
1936 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1937 master_data: std::mem::take(&mut self.master_data),
1938 document_flows,
1939 subledger,
1940 ocpm,
1941 audit,
1942 banking,
1943 graph_export,
1944 sourcing,
1945 financial_reporting,
1946 hr,
1947 accounting_standards,
1948 manufacturing: manufacturing_snap,
1949 sales_kpi_budgets,
1950 tax,
1951 esg: esg_snap,
1952 treasury,
1953 project_accounting,
1954 process_evolution,
1955 organizational_events,
1956 disruption_events,
1957 intercompany,
1958 journal_entries: entries,
1959 anomaly_labels,
1960 balance_validation,
1961 data_quality_stats,
1962 statistics: stats,
1963 lineage: Some(lineage),
1964 gate_result,
1965 internal_controls,
1966 opening_balances,
1967 subledger_reconciliation,
1968 counterfactual_pairs,
1969 red_flags,
1970 collusion_rings,
1971 temporal_vendor_chains,
1972 entity_relationship_graph,
1973 cross_process_links,
1974 industry_output,
1975 compliance_regulations,
1976 })
1977 }
1978
1979 fn phase_chart_of_accounts(
1985 &mut self,
1986 stats: &mut EnhancedGenerationStatistics,
1987 ) -> SynthResult<Arc<ChartOfAccounts>> {
1988 info!("Phase 1: Generating Chart of Accounts");
1989 let coa = self.generate_coa()?;
1990 stats.accounts_count = coa.account_count();
1991 info!(
1992 "Chart of Accounts generated: {} accounts",
1993 stats.accounts_count
1994 );
1995 self.check_resources_with_log("post-coa")?;
1996 Ok(coa)
1997 }
1998
1999 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2001 if self.phase_config.generate_master_data {
2002 info!("Phase 2: Generating Master Data");
2003 self.generate_master_data()?;
2004 stats.vendor_count = self.master_data.vendors.len();
2005 stats.customer_count = self.master_data.customers.len();
2006 stats.material_count = self.master_data.materials.len();
2007 stats.asset_count = self.master_data.assets.len();
2008 stats.employee_count = self.master_data.employees.len();
2009 info!(
2010 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2011 stats.vendor_count, stats.customer_count, stats.material_count,
2012 stats.asset_count, stats.employee_count
2013 );
2014 self.check_resources_with_log("post-master-data")?;
2015 } else {
2016 debug!("Phase 2: Skipped (master data generation disabled)");
2017 }
2018 Ok(())
2019 }
2020
2021 fn phase_document_flows(
2023 &mut self,
2024 stats: &mut EnhancedGenerationStatistics,
2025 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2026 let mut document_flows = DocumentFlowSnapshot::default();
2027 let mut subledger = SubledgerSnapshot::default();
2028
2029 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2030 info!("Phase 3: Generating Document Flows");
2031 self.generate_document_flows(&mut document_flows)?;
2032 stats.p2p_chain_count = document_flows.p2p_chains.len();
2033 stats.o2c_chain_count = document_flows.o2c_chains.len();
2034 info!(
2035 "Document flows generated: {} P2P chains, {} O2C chains",
2036 stats.p2p_chain_count, stats.o2c_chain_count
2037 );
2038
2039 debug!("Phase 3b: Linking document flows to subledgers");
2041 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2042 stats.ap_invoice_count = subledger.ap_invoices.len();
2043 stats.ar_invoice_count = subledger.ar_invoices.len();
2044 debug!(
2045 "Subledgers linked: {} AP invoices, {} AR invoices",
2046 stats.ap_invoice_count, stats.ar_invoice_count
2047 );
2048
2049 self.check_resources_with_log("post-document-flows")?;
2050 } else {
2051 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2052 }
2053
2054 let mut fa_journal_entries = Vec::new();
2056 if !self.master_data.assets.is_empty() {
2057 debug!("Generating FA subledger records");
2058 let company_code = self
2059 .config
2060 .companies
2061 .first()
2062 .map(|c| c.code.as_str())
2063 .unwrap_or("1000");
2064 let currency = self
2065 .config
2066 .companies
2067 .first()
2068 .map(|c| c.currency.as_str())
2069 .unwrap_or("USD");
2070
2071 let mut fa_gen = datasynth_generators::FAGenerator::new(
2072 datasynth_generators::FAGeneratorConfig::default(),
2073 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2074 );
2075
2076 for asset in &self.master_data.assets {
2077 let (record, je) = fa_gen.generate_asset_acquisition(
2078 company_code,
2079 &format!("{:?}", asset.asset_class),
2080 &asset.description,
2081 asset.acquisition_date,
2082 currency,
2083 asset.cost_center.as_deref(),
2084 );
2085 subledger.fa_records.push(record);
2086 fa_journal_entries.push(je);
2087 }
2088
2089 stats.fa_subledger_count = subledger.fa_records.len();
2090 debug!(
2091 "FA subledger records generated: {} (with {} acquisition JEs)",
2092 stats.fa_subledger_count,
2093 fa_journal_entries.len()
2094 );
2095 }
2096
2097 if !self.master_data.materials.is_empty() {
2099 debug!("Generating Inventory subledger records");
2100 let first_company = self.config.companies.first();
2101 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2102 let inv_currency = first_company
2103 .map(|c| c.currency.clone())
2104 .unwrap_or_else(|| "USD".to_string());
2105
2106 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2107 datasynth_generators::InventoryGeneratorConfig::default(),
2108 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2109 inv_currency.clone(),
2110 );
2111
2112 for (i, material) in self.master_data.materials.iter().enumerate() {
2113 let plant = format!("PLANT{:02}", (i % 3) + 1);
2114 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2115 let initial_qty = rust_decimal::Decimal::from(
2116 material
2117 .safety_stock
2118 .to_string()
2119 .parse::<i64>()
2120 .unwrap_or(100),
2121 );
2122
2123 let position = inv_gen.generate_position(
2124 company_code,
2125 &plant,
2126 &storage_loc,
2127 &material.material_id,
2128 &material.description,
2129 initial_qty,
2130 Some(material.standard_cost),
2131 &inv_currency,
2132 );
2133 subledger.inventory_positions.push(position);
2134 }
2135
2136 stats.inventory_subledger_count = subledger.inventory_positions.len();
2137 debug!(
2138 "Inventory subledger records generated: {}",
2139 stats.inventory_subledger_count
2140 );
2141 }
2142
2143 Ok((document_flows, subledger, fa_journal_entries))
2144 }
2145
2146 #[allow(clippy::too_many_arguments)]
2148 fn phase_ocpm_events(
2149 &mut self,
2150 document_flows: &DocumentFlowSnapshot,
2151 sourcing: &SourcingSnapshot,
2152 hr: &HrSnapshot,
2153 manufacturing: &ManufacturingSnapshot,
2154 banking: &BankingSnapshot,
2155 audit: &AuditSnapshot,
2156 financial_reporting: &FinancialReportingSnapshot,
2157 stats: &mut EnhancedGenerationStatistics,
2158 ) -> SynthResult<OcpmSnapshot> {
2159 if self.phase_config.generate_ocpm_events {
2160 info!("Phase 3c: Generating OCPM Events");
2161 let ocpm_snapshot = self.generate_ocpm_events(
2162 document_flows,
2163 sourcing,
2164 hr,
2165 manufacturing,
2166 banking,
2167 audit,
2168 financial_reporting,
2169 )?;
2170 stats.ocpm_event_count = ocpm_snapshot.event_count;
2171 stats.ocpm_object_count = ocpm_snapshot.object_count;
2172 stats.ocpm_case_count = ocpm_snapshot.case_count;
2173 info!(
2174 "OCPM events generated: {} events, {} objects, {} cases",
2175 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2176 );
2177 self.check_resources_with_log("post-ocpm")?;
2178 Ok(ocpm_snapshot)
2179 } else {
2180 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2181 Ok(OcpmSnapshot::default())
2182 }
2183 }
2184
2185 fn phase_journal_entries(
2187 &mut self,
2188 coa: &Arc<ChartOfAccounts>,
2189 document_flows: &DocumentFlowSnapshot,
2190 _stats: &mut EnhancedGenerationStatistics,
2191 ) -> SynthResult<Vec<JournalEntry>> {
2192 let mut entries = Vec::new();
2193
2194 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2196 debug!("Phase 4a: Generating JEs from document flows");
2197 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2198 debug!("Generated {} JEs from document flows", flow_entries.len());
2199 entries.extend(flow_entries);
2200 }
2201
2202 if self.phase_config.generate_journal_entries {
2204 info!("Phase 4: Generating Journal Entries");
2205 let je_entries = self.generate_journal_entries(coa)?;
2206 info!("Generated {} standalone journal entries", je_entries.len());
2207 entries.extend(je_entries);
2208 } else {
2209 debug!("Phase 4: Skipped (journal entry generation disabled)");
2210 }
2211
2212 if !entries.is_empty() {
2213 self.check_resources_with_log("post-journal-entries")?;
2216 }
2217
2218 Ok(entries)
2219 }
2220
2221 fn phase_anomaly_injection(
2223 &mut self,
2224 entries: &mut [JournalEntry],
2225 actions: &DegradationActions,
2226 stats: &mut EnhancedGenerationStatistics,
2227 ) -> SynthResult<AnomalyLabels> {
2228 if self.phase_config.inject_anomalies
2229 && !entries.is_empty()
2230 && !actions.skip_anomaly_injection
2231 {
2232 info!("Phase 5: Injecting Anomalies");
2233 let result = self.inject_anomalies(entries)?;
2234 stats.anomalies_injected = result.labels.len();
2235 info!("Injected {} anomalies", stats.anomalies_injected);
2236 self.check_resources_with_log("post-anomaly-injection")?;
2237 Ok(result)
2238 } else if actions.skip_anomaly_injection {
2239 warn!("Phase 5: Skipped due to resource degradation");
2240 Ok(AnomalyLabels::default())
2241 } else {
2242 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2243 Ok(AnomalyLabels::default())
2244 }
2245 }
2246
2247 fn phase_balance_validation(
2249 &mut self,
2250 entries: &[JournalEntry],
2251 ) -> SynthResult<BalanceValidationResult> {
2252 if self.phase_config.validate_balances && !entries.is_empty() {
2253 debug!("Phase 6: Validating Balances");
2254 let balance_validation = self.validate_journal_entries(entries)?;
2255 if balance_validation.is_balanced {
2256 debug!("Balance validation passed");
2257 } else {
2258 warn!(
2259 "Balance validation found {} errors",
2260 balance_validation.validation_errors.len()
2261 );
2262 }
2263 Ok(balance_validation)
2264 } else {
2265 Ok(BalanceValidationResult::default())
2266 }
2267 }
2268
2269 fn phase_data_quality_injection(
2271 &mut self,
2272 entries: &mut [JournalEntry],
2273 actions: &DegradationActions,
2274 stats: &mut EnhancedGenerationStatistics,
2275 ) -> SynthResult<DataQualityStats> {
2276 if self.phase_config.inject_data_quality
2277 && !entries.is_empty()
2278 && !actions.skip_data_quality
2279 {
2280 info!("Phase 7: Injecting Data Quality Variations");
2281 let dq_stats = self.inject_data_quality(entries)?;
2282 stats.data_quality_issues = dq_stats.records_with_issues;
2283 info!("Injected {} data quality issues", stats.data_quality_issues);
2284 self.check_resources_with_log("post-data-quality")?;
2285 Ok(dq_stats)
2286 } else if actions.skip_data_quality {
2287 warn!("Phase 7: Skipped due to resource degradation");
2288 Ok(DataQualityStats::default())
2289 } else {
2290 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2291 Ok(DataQualityStats::default())
2292 }
2293 }
2294
2295 fn phase_audit_data(
2297 &mut self,
2298 entries: &[JournalEntry],
2299 stats: &mut EnhancedGenerationStatistics,
2300 ) -> SynthResult<AuditSnapshot> {
2301 if self.phase_config.generate_audit {
2302 info!("Phase 8: Generating Audit Data");
2303 let audit_snapshot = self.generate_audit_data(entries)?;
2304 stats.audit_engagement_count = audit_snapshot.engagements.len();
2305 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2306 stats.audit_evidence_count = audit_snapshot.evidence.len();
2307 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2308 stats.audit_finding_count = audit_snapshot.findings.len();
2309 stats.audit_judgment_count = audit_snapshot.judgments.len();
2310 info!(
2311 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2312 stats.audit_engagement_count, stats.audit_workpaper_count,
2313 stats.audit_evidence_count, stats.audit_risk_count,
2314 stats.audit_finding_count, stats.audit_judgment_count
2315 );
2316 self.check_resources_with_log("post-audit")?;
2317 Ok(audit_snapshot)
2318 } else {
2319 debug!("Phase 8: Skipped (audit generation disabled)");
2320 Ok(AuditSnapshot::default())
2321 }
2322 }
2323
2324 fn phase_banking_data(
2326 &mut self,
2327 stats: &mut EnhancedGenerationStatistics,
2328 ) -> SynthResult<BankingSnapshot> {
2329 if self.phase_config.generate_banking && self.config.banking.enabled {
2330 info!("Phase 9: Generating Banking KYC/AML Data");
2331 let banking_snapshot = self.generate_banking_data()?;
2332 stats.banking_customer_count = banking_snapshot.customers.len();
2333 stats.banking_account_count = banking_snapshot.accounts.len();
2334 stats.banking_transaction_count = banking_snapshot.transactions.len();
2335 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2336 info!(
2337 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2338 stats.banking_customer_count, stats.banking_account_count,
2339 stats.banking_transaction_count, stats.banking_suspicious_count
2340 );
2341 self.check_resources_with_log("post-banking")?;
2342 Ok(banking_snapshot)
2343 } else {
2344 debug!("Phase 9: Skipped (banking generation disabled)");
2345 Ok(BankingSnapshot::default())
2346 }
2347 }
2348
2349 fn phase_graph_export(
2351 &mut self,
2352 entries: &[JournalEntry],
2353 coa: &Arc<ChartOfAccounts>,
2354 stats: &mut EnhancedGenerationStatistics,
2355 ) -> SynthResult<GraphExportSnapshot> {
2356 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2357 && !entries.is_empty()
2358 {
2359 info!("Phase 10: Exporting Accounting Network Graphs");
2360 match self.export_graphs(entries, coa, stats) {
2361 Ok(snapshot) => {
2362 info!(
2363 "Graph export complete: {} graphs ({} nodes, {} edges)",
2364 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2365 );
2366 Ok(snapshot)
2367 }
2368 Err(e) => {
2369 warn!("Phase 10: Graph export failed: {}", e);
2370 Ok(GraphExportSnapshot::default())
2371 }
2372 }
2373 } else {
2374 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2375 Ok(GraphExportSnapshot::default())
2376 }
2377 }
2378
2379 #[allow(clippy::too_many_arguments)]
2381 fn phase_hypergraph_export(
2382 &self,
2383 coa: &Arc<ChartOfAccounts>,
2384 entries: &[JournalEntry],
2385 document_flows: &DocumentFlowSnapshot,
2386 sourcing: &SourcingSnapshot,
2387 hr: &HrSnapshot,
2388 manufacturing: &ManufacturingSnapshot,
2389 banking: &BankingSnapshot,
2390 audit: &AuditSnapshot,
2391 financial_reporting: &FinancialReportingSnapshot,
2392 ocpm: &OcpmSnapshot,
2393 compliance: &ComplianceRegulationsSnapshot,
2394 stats: &mut EnhancedGenerationStatistics,
2395 ) -> SynthResult<()> {
2396 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2397 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2398 match self.export_hypergraph(
2399 coa,
2400 entries,
2401 document_flows,
2402 sourcing,
2403 hr,
2404 manufacturing,
2405 banking,
2406 audit,
2407 financial_reporting,
2408 ocpm,
2409 compliance,
2410 stats,
2411 ) {
2412 Ok(info) => {
2413 info!(
2414 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2415 info.node_count, info.edge_count, info.hyperedge_count
2416 );
2417 }
2418 Err(e) => {
2419 warn!("Phase 10b: Hypergraph export failed: {}", e);
2420 }
2421 }
2422 } else {
2423 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2424 }
2425 Ok(())
2426 }
2427
2428 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2434 if !self.config.llm.enabled {
2435 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2436 return;
2437 }
2438
2439 info!("Phase 11: Starting LLM Enrichment");
2440 let start = std::time::Instant::now();
2441
2442 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2443 let provider = Arc::new(MockLlmProvider::new(self.seed));
2444 let enricher = VendorLlmEnricher::new(provider);
2445
2446 let industry = format!("{:?}", self.config.global.industry);
2447 let max_enrichments = self
2448 .config
2449 .llm
2450 .max_vendor_enrichments
2451 .min(self.master_data.vendors.len());
2452
2453 let mut enriched_count = 0usize;
2454 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2455 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2456 Ok(name) => {
2457 vendor.name = name;
2458 enriched_count += 1;
2459 }
2460 Err(e) => {
2461 warn!(
2462 "LLM vendor enrichment failed for {}: {}",
2463 vendor.vendor_id, e
2464 );
2465 }
2466 }
2467 }
2468
2469 enriched_count
2470 }));
2471
2472 match result {
2473 Ok(enriched_count) => {
2474 stats.llm_vendors_enriched = enriched_count;
2475 let elapsed = start.elapsed();
2476 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2477 info!(
2478 "Phase 11 complete: {} vendors enriched in {}ms",
2479 enriched_count, stats.llm_enrichment_ms
2480 );
2481 }
2482 Err(_) => {
2483 let elapsed = start.elapsed();
2484 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2485 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2486 }
2487 }
2488 }
2489
2490 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2496 if !self.config.diffusion.enabled {
2497 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2498 return;
2499 }
2500
2501 info!("Phase 12: Starting Diffusion Enhancement");
2502 let start = std::time::Instant::now();
2503
2504 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2505 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2508
2509 let diffusion_config = DiffusionConfig {
2510 n_steps: self.config.diffusion.n_steps,
2511 seed: self.seed,
2512 ..Default::default()
2513 };
2514
2515 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2516
2517 let n_samples = self.config.diffusion.sample_size;
2518 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2520
2521 samples.len()
2522 }));
2523
2524 match result {
2525 Ok(sample_count) => {
2526 stats.diffusion_samples_generated = sample_count;
2527 let elapsed = start.elapsed();
2528 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2529 info!(
2530 "Phase 12 complete: {} diffusion samples generated in {}ms",
2531 sample_count, stats.diffusion_enhancement_ms
2532 );
2533 }
2534 Err(_) => {
2535 let elapsed = start.elapsed();
2536 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2537 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2538 }
2539 }
2540 }
2541
2542 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2549 if !self.config.causal.enabled {
2550 debug!("Phase 13: Skipped (causal generation disabled)");
2551 return;
2552 }
2553
2554 info!("Phase 13: Starting Causal Overlay");
2555 let start = std::time::Instant::now();
2556
2557 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2558 let graph = match self.config.causal.template.as_str() {
2560 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2561 _ => CausalGraph::fraud_detection_template(),
2562 };
2563
2564 let scm = StructuralCausalModel::new(graph.clone())
2565 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2566
2567 let n_samples = self.config.causal.sample_size;
2568 let samples = scm
2569 .generate(n_samples, self.seed)
2570 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2571
2572 let validation_passed = if self.config.causal.validate {
2574 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2575 if report.valid {
2576 info!(
2577 "Causal validation passed: all {} checks OK",
2578 report.checks.len()
2579 );
2580 } else {
2581 warn!(
2582 "Causal validation: {} violations detected: {:?}",
2583 report.violations.len(),
2584 report.violations
2585 );
2586 }
2587 Some(report.valid)
2588 } else {
2589 None
2590 };
2591
2592 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2593 }));
2594
2595 match result {
2596 Ok(Ok((sample_count, validation_passed))) => {
2597 stats.causal_samples_generated = sample_count;
2598 stats.causal_validation_passed = validation_passed;
2599 let elapsed = start.elapsed();
2600 stats.causal_generation_ms = elapsed.as_millis() as u64;
2601 info!(
2602 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2603 sample_count, stats.causal_generation_ms, validation_passed,
2604 );
2605 }
2606 Ok(Err(e)) => {
2607 let elapsed = start.elapsed();
2608 stats.causal_generation_ms = elapsed.as_millis() as u64;
2609 warn!("Phase 13: Causal generation failed: {}", e);
2610 }
2611 Err(_) => {
2612 let elapsed = start.elapsed();
2613 stats.causal_generation_ms = elapsed.as_millis() as u64;
2614 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2615 }
2616 }
2617 }
2618
2619 fn phase_sourcing_data(
2621 &mut self,
2622 stats: &mut EnhancedGenerationStatistics,
2623 ) -> SynthResult<SourcingSnapshot> {
2624 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2625 debug!("Phase 14: Skipped (sourcing generation disabled)");
2626 return Ok(SourcingSnapshot::default());
2627 }
2628
2629 info!("Phase 14: Generating S2C Sourcing Data");
2630 let seed = self.seed;
2631
2632 let vendor_ids: Vec<String> = self
2634 .master_data
2635 .vendors
2636 .iter()
2637 .map(|v| v.vendor_id.clone())
2638 .collect();
2639 if vendor_ids.is_empty() {
2640 debug!("Phase 14: Skipped (no vendors available)");
2641 return Ok(SourcingSnapshot::default());
2642 }
2643
2644 let categories: Vec<(String, String)> = vec![
2645 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2646 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2647 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2648 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2649 ("CAT-LOG".to_string(), "Logistics".to_string()),
2650 ];
2651 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2652 .iter()
2653 .map(|(id, name)| {
2654 (
2655 id.clone(),
2656 name.clone(),
2657 rust_decimal::Decimal::from(100_000),
2658 )
2659 })
2660 .collect();
2661
2662 let company_code = self
2663 .config
2664 .companies
2665 .first()
2666 .map(|c| c.code.as_str())
2667 .unwrap_or("1000");
2668 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2669 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2670 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2671 let fiscal_year = start_date.year() as u16;
2672 let owner_ids: Vec<String> = self
2673 .master_data
2674 .employees
2675 .iter()
2676 .take(5)
2677 .map(|e| e.employee_id.clone())
2678 .collect();
2679 let owner_id = owner_ids
2680 .first()
2681 .map(std::string::String::as_str)
2682 .unwrap_or("BUYER-001");
2683
2684 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2686 let spend_analyses =
2687 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2688
2689 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2691 let sourcing_projects = if owner_ids.is_empty() {
2692 Vec::new()
2693 } else {
2694 project_gen.generate(
2695 company_code,
2696 &categories_with_spend,
2697 &owner_ids,
2698 start_date,
2699 self.config.global.period_months,
2700 )
2701 };
2702 stats.sourcing_project_count = sourcing_projects.len();
2703
2704 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2706 let mut qual_gen = QualificationGenerator::new(seed + 2);
2707 let qualifications = qual_gen.generate(
2708 company_code,
2709 &qual_vendor_ids,
2710 sourcing_projects.first().map(|p| p.project_id.as_str()),
2711 owner_id,
2712 start_date,
2713 );
2714
2715 let mut rfx_gen = RfxGenerator::new(seed + 3);
2717 let rfx_events: Vec<RfxEvent> = sourcing_projects
2718 .iter()
2719 .map(|proj| {
2720 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2721 rfx_gen.generate(
2722 company_code,
2723 &proj.project_id,
2724 &proj.category_id,
2725 &qualified_vids,
2726 owner_id,
2727 start_date,
2728 50000.0,
2729 )
2730 })
2731 .collect();
2732 stats.rfx_event_count = rfx_events.len();
2733
2734 let mut bid_gen = BidGenerator::new(seed + 4);
2736 let mut all_bids = Vec::new();
2737 for rfx in &rfx_events {
2738 let bidder_count = vendor_ids.len().clamp(2, 5);
2739 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2740 let bids = bid_gen.generate(rfx, &responding, start_date);
2741 all_bids.extend(bids);
2742 }
2743 stats.bid_count = all_bids.len();
2744
2745 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2747 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2748 .iter()
2749 .map(|rfx| {
2750 let rfx_bids: Vec<SupplierBid> = all_bids
2751 .iter()
2752 .filter(|b| b.rfx_id == rfx.rfx_id)
2753 .cloned()
2754 .collect();
2755 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2756 })
2757 .collect();
2758
2759 let mut contract_gen = ContractGenerator::new(seed + 6);
2761 let contracts: Vec<ProcurementContract> = bid_evaluations
2762 .iter()
2763 .zip(rfx_events.iter())
2764 .filter_map(|(eval, rfx)| {
2765 eval.ranked_bids.first().and_then(|winner| {
2766 all_bids
2767 .iter()
2768 .find(|b| b.bid_id == winner.bid_id)
2769 .map(|winning_bid| {
2770 contract_gen.generate_from_bid(
2771 winning_bid,
2772 Some(&rfx.sourcing_project_id),
2773 &rfx.category_id,
2774 owner_id,
2775 start_date,
2776 )
2777 })
2778 })
2779 })
2780 .collect();
2781 stats.contract_count = contracts.len();
2782
2783 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2785 let catalog_items = catalog_gen.generate(&contracts);
2786 stats.catalog_item_count = catalog_items.len();
2787
2788 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2790 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2791 .iter()
2792 .fold(
2793 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2794 |mut acc, c| {
2795 acc.entry(c.vendor_id.clone()).or_default().push(c);
2796 acc
2797 },
2798 )
2799 .into_iter()
2800 .collect();
2801 let scorecards = scorecard_gen.generate(
2802 company_code,
2803 &vendor_contracts,
2804 start_date,
2805 end_date,
2806 owner_id,
2807 );
2808 stats.scorecard_count = scorecards.len();
2809
2810 let mut sourcing_projects = sourcing_projects;
2813 for project in &mut sourcing_projects {
2814 project.rfx_ids = rfx_events
2816 .iter()
2817 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2818 .map(|rfx| rfx.rfx_id.clone())
2819 .collect();
2820
2821 project.contract_id = contracts
2823 .iter()
2824 .find(|c| {
2825 c.sourcing_project_id
2826 .as_deref()
2827 .is_some_and(|sp| sp == project.project_id)
2828 })
2829 .map(|c| c.contract_id.clone());
2830
2831 project.spend_analysis_id = spend_analyses
2833 .iter()
2834 .find(|sa| sa.category_id == project.category_id)
2835 .map(|sa| sa.category_id.clone());
2836 }
2837
2838 info!(
2839 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2840 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2841 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2842 );
2843 self.check_resources_with_log("post-sourcing")?;
2844
2845 Ok(SourcingSnapshot {
2846 spend_analyses,
2847 sourcing_projects,
2848 qualifications,
2849 rfx_events,
2850 bids: all_bids,
2851 bid_evaluations,
2852 contracts,
2853 catalog_items,
2854 scorecards,
2855 })
2856 }
2857
2858 fn phase_intercompany(
2860 &mut self,
2861 stats: &mut EnhancedGenerationStatistics,
2862 ) -> SynthResult<IntercompanySnapshot> {
2863 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2865 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2866 return Ok(IntercompanySnapshot::default());
2867 }
2868
2869 if self.config.companies.len() < 2 {
2871 debug!(
2872 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2873 self.config.companies.len()
2874 );
2875 return Ok(IntercompanySnapshot::default());
2876 }
2877
2878 info!("Phase 14b: Generating Intercompany Transactions");
2879
2880 let seed = self.seed;
2881 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2882 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2883 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2884
2885 let parent_code = self.config.companies[0].code.clone();
2888 let mut ownership_structure =
2889 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2890
2891 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2892 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2893 format!("REL{:03}", i + 1),
2894 parent_code.clone(),
2895 company.code.clone(),
2896 rust_decimal::Decimal::from(100), start_date,
2898 );
2899 ownership_structure.add_relationship(relationship);
2900 }
2901
2902 let tp_method = match self.config.intercompany.transfer_pricing_method {
2904 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2905 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2906 }
2907 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2908 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2909 }
2910 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2911 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2912 }
2913 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2914 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2915 }
2916 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2917 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2918 }
2919 };
2920
2921 let ic_currency = self
2923 .config
2924 .companies
2925 .first()
2926 .map(|c| c.currency.clone())
2927 .unwrap_or_else(|| "USD".to_string());
2928 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2929 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2930 transfer_pricing_method: tp_method,
2931 markup_percent: rust_decimal::Decimal::from_f64_retain(
2932 self.config.intercompany.markup_percent,
2933 )
2934 .unwrap_or(rust_decimal::Decimal::from(5)),
2935 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2936 default_currency: ic_currency,
2937 ..Default::default()
2938 };
2939
2940 let mut ic_generator = datasynth_generators::ICGenerator::new(
2942 ic_gen_config,
2943 ownership_structure.clone(),
2944 seed + 50,
2945 );
2946
2947 let transactions_per_day = 3;
2950 let matched_pairs = ic_generator.generate_transactions_for_period(
2951 start_date,
2952 end_date,
2953 transactions_per_day,
2954 );
2955
2956 let mut seller_entries = Vec::new();
2958 let mut buyer_entries = Vec::new();
2959 let fiscal_year = start_date.year();
2960
2961 for pair in &matched_pairs {
2962 let fiscal_period = pair.posting_date.month();
2963 let (seller_je, buyer_je) =
2964 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2965 seller_entries.push(seller_je);
2966 buyer_entries.push(buyer_je);
2967 }
2968
2969 let matching_config = datasynth_generators::ICMatchingConfig {
2971 base_currency: self
2972 .config
2973 .companies
2974 .first()
2975 .map(|c| c.currency.clone())
2976 .unwrap_or_else(|| "USD".to_string()),
2977 ..Default::default()
2978 };
2979 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2980 matching_engine.load_matched_pairs(&matched_pairs);
2981 let matching_result = matching_engine.run_matching(end_date);
2982
2983 let mut elimination_entries = Vec::new();
2985 if self.config.intercompany.generate_eliminations {
2986 let elim_config = datasynth_generators::EliminationConfig {
2987 consolidation_entity: "GROUP".to_string(),
2988 base_currency: self
2989 .config
2990 .companies
2991 .first()
2992 .map(|c| c.currency.clone())
2993 .unwrap_or_else(|| "USD".to_string()),
2994 ..Default::default()
2995 };
2996
2997 let mut elim_generator =
2998 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2999
3000 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3001 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3002 matching_result
3003 .matched_balances
3004 .iter()
3005 .chain(matching_result.unmatched_balances.iter())
3006 .cloned()
3007 .collect();
3008
3009 let journal = elim_generator.generate_eliminations(
3010 &fiscal_period,
3011 end_date,
3012 &all_balances,
3013 &matched_pairs,
3014 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
3017
3018 elimination_entries = journal.entries.clone();
3019 }
3020
3021 let matched_pair_count = matched_pairs.len();
3022 let elimination_entry_count = elimination_entries.len();
3023 let match_rate = matching_result.match_rate;
3024
3025 stats.ic_matched_pair_count = matched_pair_count;
3026 stats.ic_elimination_count = elimination_entry_count;
3027 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3028
3029 info!(
3030 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3031 matched_pair_count,
3032 stats.ic_transaction_count,
3033 seller_entries.len(),
3034 buyer_entries.len(),
3035 elimination_entry_count,
3036 match_rate * 100.0
3037 );
3038 self.check_resources_with_log("post-intercompany")?;
3039
3040 Ok(IntercompanySnapshot {
3041 matched_pairs,
3042 seller_journal_entries: seller_entries,
3043 buyer_journal_entries: buyer_entries,
3044 elimination_entries,
3045 matched_pair_count,
3046 elimination_entry_count,
3047 match_rate,
3048 })
3049 }
3050
3051 fn phase_financial_reporting(
3053 &mut self,
3054 document_flows: &DocumentFlowSnapshot,
3055 journal_entries: &[JournalEntry],
3056 coa: &Arc<ChartOfAccounts>,
3057 stats: &mut EnhancedGenerationStatistics,
3058 ) -> SynthResult<FinancialReportingSnapshot> {
3059 let fs_enabled = self.phase_config.generate_financial_statements
3060 || self.config.financial_reporting.enabled;
3061 let br_enabled = self.phase_config.generate_bank_reconciliation;
3062
3063 if !fs_enabled && !br_enabled {
3064 debug!("Phase 15: Skipped (financial reporting disabled)");
3065 return Ok(FinancialReportingSnapshot::default());
3066 }
3067
3068 info!("Phase 15: Generating Financial Reporting Data");
3069
3070 let seed = self.seed;
3071 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3072 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3073
3074 let mut financial_statements = Vec::new();
3075 let mut bank_reconciliations = Vec::new();
3076 let mut trial_balances = Vec::new();
3077
3078 if fs_enabled {
3086 let company_code = self
3087 .config
3088 .companies
3089 .first()
3090 .map(|c| c.code.as_str())
3091 .unwrap_or("1000");
3092 let currency = self
3093 .config
3094 .companies
3095 .first()
3096 .map(|c| c.currency.as_str())
3097 .unwrap_or("USD");
3098 let has_journal_entries = !journal_entries.is_empty();
3099
3100 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3103
3104 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3106 None;
3107
3108 for period in 0..self.config.global.period_months {
3110 let period_start = start_date + chrono::Months::new(period);
3111 let period_end =
3112 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3113 let fiscal_year = period_end.year() as u16;
3114 let fiscal_period = period_end.month() as u8;
3115
3116 if has_journal_entries {
3117 let tb_entries = Self::build_cumulative_trial_balance(
3120 journal_entries,
3121 coa,
3122 company_code,
3123 start_date,
3124 period_end,
3125 fiscal_year,
3126 fiscal_period,
3127 );
3128
3129 let prior_ref = prior_cumulative_tb.as_deref();
3132 let stmts = fs_gen.generate(
3133 company_code,
3134 currency,
3135 &tb_entries,
3136 period_start,
3137 period_end,
3138 fiscal_year,
3139 fiscal_period,
3140 prior_ref,
3141 "SYS-AUTOCLOSE",
3142 );
3143
3144 for stmt in stmts {
3146 if stmt.statement_type == StatementType::CashFlowStatement {
3147 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3149 let cf_items = Self::build_cash_flow_from_trial_balances(
3150 &tb_entries,
3151 prior_ref,
3152 net_income,
3153 );
3154 financial_statements.push(FinancialStatement {
3155 cash_flow_items: cf_items,
3156 ..stmt
3157 });
3158 } else {
3159 financial_statements.push(stmt);
3160 }
3161 }
3162
3163 trial_balances.push(PeriodTrialBalance {
3165 fiscal_year,
3166 fiscal_period,
3167 period_start,
3168 period_end,
3169 entries: tb_entries.clone(),
3170 });
3171
3172 prior_cumulative_tb = Some(tb_entries);
3174 } else {
3175 let tb_entries = Self::build_trial_balance_from_entries(
3178 journal_entries,
3179 coa,
3180 company_code,
3181 fiscal_year,
3182 fiscal_period,
3183 );
3184
3185 let stmts = fs_gen.generate(
3186 company_code,
3187 currency,
3188 &tb_entries,
3189 period_start,
3190 period_end,
3191 fiscal_year,
3192 fiscal_period,
3193 None,
3194 "SYS-AUTOCLOSE",
3195 );
3196 financial_statements.extend(stmts);
3197
3198 if !tb_entries.is_empty() {
3200 trial_balances.push(PeriodTrialBalance {
3201 fiscal_year,
3202 fiscal_period,
3203 period_start,
3204 period_end,
3205 entries: tb_entries,
3206 });
3207 }
3208 }
3209 }
3210 stats.financial_statement_count = financial_statements.len();
3211 info!(
3212 "Financial statements generated: {} statements (JE-derived: {})",
3213 stats.financial_statement_count, has_journal_entries
3214 );
3215 }
3216
3217 if br_enabled && !document_flows.payments.is_empty() {
3219 let employee_ids: Vec<String> = self
3220 .master_data
3221 .employees
3222 .iter()
3223 .map(|e| e.employee_id.clone())
3224 .collect();
3225 let mut br_gen =
3226 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3227
3228 for company in &self.config.companies {
3230 let company_payments: Vec<PaymentReference> = document_flows
3231 .payments
3232 .iter()
3233 .filter(|p| p.header.company_code == company.code)
3234 .map(|p| PaymentReference {
3235 id: p.header.document_id.clone(),
3236 amount: if p.is_vendor { p.amount } else { -p.amount },
3237 date: p.header.document_date,
3238 reference: p
3239 .check_number
3240 .clone()
3241 .or_else(|| p.wire_reference.clone())
3242 .unwrap_or_else(|| p.header.document_id.clone()),
3243 })
3244 .collect();
3245
3246 if company_payments.is_empty() {
3247 continue;
3248 }
3249
3250 let bank_account_id = format!("{}-MAIN", company.code);
3251
3252 for period in 0..self.config.global.period_months {
3254 let period_start = start_date + chrono::Months::new(period);
3255 let period_end =
3256 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3257
3258 let period_payments: Vec<PaymentReference> = company_payments
3259 .iter()
3260 .filter(|p| p.date >= period_start && p.date <= period_end)
3261 .cloned()
3262 .collect();
3263
3264 let recon = br_gen.generate(
3265 &company.code,
3266 &bank_account_id,
3267 period_start,
3268 period_end,
3269 &company.currency,
3270 &period_payments,
3271 );
3272 bank_reconciliations.push(recon);
3273 }
3274 }
3275 info!(
3276 "Bank reconciliations generated: {} reconciliations",
3277 bank_reconciliations.len()
3278 );
3279 }
3280
3281 stats.bank_reconciliation_count = bank_reconciliations.len();
3282 self.check_resources_with_log("post-financial-reporting")?;
3283
3284 if !trial_balances.is_empty() {
3285 info!(
3286 "Period-close trial balances captured: {} periods",
3287 trial_balances.len()
3288 );
3289 }
3290
3291 Ok(FinancialReportingSnapshot {
3292 financial_statements,
3293 bank_reconciliations,
3294 trial_balances,
3295 })
3296 }
3297
3298 fn build_trial_balance_from_entries(
3304 journal_entries: &[JournalEntry],
3305 coa: &ChartOfAccounts,
3306 company_code: &str,
3307 fiscal_year: u16,
3308 fiscal_period: u8,
3309 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3310 use rust_decimal::Decimal;
3311
3312 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3314 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3315
3316 for je in journal_entries {
3317 if je.header.company_code != company_code
3319 || je.header.fiscal_year != fiscal_year
3320 || je.header.fiscal_period != fiscal_period
3321 {
3322 continue;
3323 }
3324
3325 for line in &je.lines {
3326 let acct = &line.gl_account;
3327 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3328 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3329 }
3330 }
3331
3332 let mut all_accounts: Vec<&String> = account_debits
3334 .keys()
3335 .chain(account_credits.keys())
3336 .collect::<std::collections::HashSet<_>>()
3337 .into_iter()
3338 .collect();
3339 all_accounts.sort();
3340
3341 let mut entries = Vec::new();
3342
3343 for acct_number in all_accounts {
3344 let debit = account_debits
3345 .get(acct_number)
3346 .copied()
3347 .unwrap_or(Decimal::ZERO);
3348 let credit = account_credits
3349 .get(acct_number)
3350 .copied()
3351 .unwrap_or(Decimal::ZERO);
3352
3353 if debit.is_zero() && credit.is_zero() {
3354 continue;
3355 }
3356
3357 let account_name = coa
3359 .get_account(acct_number)
3360 .map(|gl| gl.short_description.clone())
3361 .unwrap_or_else(|| format!("Account {acct_number}"));
3362
3363 let category = Self::category_from_account_code(acct_number);
3368
3369 entries.push(datasynth_generators::TrialBalanceEntry {
3370 account_code: acct_number.clone(),
3371 account_name,
3372 category,
3373 debit_balance: debit,
3374 credit_balance: credit,
3375 });
3376 }
3377
3378 entries
3379 }
3380
3381 fn build_cumulative_trial_balance(
3388 journal_entries: &[JournalEntry],
3389 coa: &ChartOfAccounts,
3390 company_code: &str,
3391 start_date: NaiveDate,
3392 period_end: NaiveDate,
3393 fiscal_year: u16,
3394 fiscal_period: u8,
3395 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3396 use rust_decimal::Decimal;
3397
3398 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3400 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3401
3402 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3404 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3405
3406 for je in journal_entries {
3407 if je.header.company_code != company_code {
3408 continue;
3409 }
3410
3411 for line in &je.lines {
3412 let acct = &line.gl_account;
3413 let category = Self::category_from_account_code(acct);
3414 let is_bs_account = matches!(
3415 category.as_str(),
3416 "Cash"
3417 | "Receivables"
3418 | "Inventory"
3419 | "FixedAssets"
3420 | "Payables"
3421 | "AccruedLiabilities"
3422 | "LongTermDebt"
3423 | "Equity"
3424 );
3425
3426 if is_bs_account {
3427 if je.header.document_date <= period_end
3429 && je.header.document_date >= start_date
3430 {
3431 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3432 line.debit_amount;
3433 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3434 line.credit_amount;
3435 }
3436 } else {
3437 if je.header.fiscal_year == fiscal_year
3439 && je.header.fiscal_period == fiscal_period
3440 {
3441 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3442 line.debit_amount;
3443 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3444 line.credit_amount;
3445 }
3446 }
3447 }
3448 }
3449
3450 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3452 all_accounts.extend(bs_debits.keys().cloned());
3453 all_accounts.extend(bs_credits.keys().cloned());
3454 all_accounts.extend(is_debits.keys().cloned());
3455 all_accounts.extend(is_credits.keys().cloned());
3456
3457 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3458 sorted_accounts.sort();
3459
3460 let mut entries = Vec::new();
3461
3462 for acct_number in &sorted_accounts {
3463 let category = Self::category_from_account_code(acct_number);
3464 let is_bs_account = matches!(
3465 category.as_str(),
3466 "Cash"
3467 | "Receivables"
3468 | "Inventory"
3469 | "FixedAssets"
3470 | "Payables"
3471 | "AccruedLiabilities"
3472 | "LongTermDebt"
3473 | "Equity"
3474 );
3475
3476 let (debit, credit) = if is_bs_account {
3477 (
3478 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3479 bs_credits
3480 .get(acct_number)
3481 .copied()
3482 .unwrap_or(Decimal::ZERO),
3483 )
3484 } else {
3485 (
3486 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3487 is_credits
3488 .get(acct_number)
3489 .copied()
3490 .unwrap_or(Decimal::ZERO),
3491 )
3492 };
3493
3494 if debit.is_zero() && credit.is_zero() {
3495 continue;
3496 }
3497
3498 let account_name = coa
3499 .get_account(acct_number)
3500 .map(|gl| gl.short_description.clone())
3501 .unwrap_or_else(|| format!("Account {acct_number}"));
3502
3503 entries.push(datasynth_generators::TrialBalanceEntry {
3504 account_code: acct_number.clone(),
3505 account_name,
3506 category,
3507 debit_balance: debit,
3508 credit_balance: credit,
3509 });
3510 }
3511
3512 entries
3513 }
3514
3515 fn build_cash_flow_from_trial_balances(
3520 current_tb: &[datasynth_generators::TrialBalanceEntry],
3521 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3522 net_income: rust_decimal::Decimal,
3523 ) -> Vec<CashFlowItem> {
3524 use rust_decimal::Decimal;
3525
3526 let aggregate =
3528 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3529 let mut map: HashMap<String, Decimal> = HashMap::new();
3530 for entry in tb {
3531 let net = entry.debit_balance - entry.credit_balance;
3532 *map.entry(entry.category.clone()).or_default() += net;
3533 }
3534 map
3535 };
3536
3537 let current = aggregate(current_tb);
3538 let prior = prior_tb.map(aggregate);
3539
3540 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3542 *map.get(key).unwrap_or(&Decimal::ZERO)
3543 };
3544
3545 let change = |key: &str| -> Decimal {
3547 let curr = get(¤t, key);
3548 match &prior {
3549 Some(p) => curr - get(p, key),
3550 None => curr,
3551 }
3552 };
3553
3554 let fixed_asset_change = change("FixedAssets");
3557 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3558 -fixed_asset_change
3559 } else {
3560 Decimal::ZERO
3561 };
3562
3563 let ar_change = change("Receivables");
3565 let inventory_change = change("Inventory");
3566 let ap_change = change("Payables");
3568 let accrued_change = change("AccruedLiabilities");
3569
3570 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3571 + (-ap_change)
3572 + (-accrued_change);
3573
3574 let capex = if fixed_asset_change > Decimal::ZERO {
3576 -fixed_asset_change
3577 } else {
3578 Decimal::ZERO
3579 };
3580 let investing_cf = capex;
3581
3582 let debt_change = -change("LongTermDebt");
3584 let equity_change = -change("Equity");
3585 let financing_cf = debt_change + equity_change;
3586
3587 let net_change = operating_cf + investing_cf + financing_cf;
3588
3589 vec![
3590 CashFlowItem {
3591 item_code: "CF-NI".to_string(),
3592 label: "Net Income".to_string(),
3593 category: CashFlowCategory::Operating,
3594 amount: net_income,
3595 amount_prior: None,
3596 sort_order: 1,
3597 is_total: false,
3598 },
3599 CashFlowItem {
3600 item_code: "CF-DEP".to_string(),
3601 label: "Depreciation & Amortization".to_string(),
3602 category: CashFlowCategory::Operating,
3603 amount: depreciation_addback,
3604 amount_prior: None,
3605 sort_order: 2,
3606 is_total: false,
3607 },
3608 CashFlowItem {
3609 item_code: "CF-AR".to_string(),
3610 label: "Change in Accounts Receivable".to_string(),
3611 category: CashFlowCategory::Operating,
3612 amount: -ar_change,
3613 amount_prior: None,
3614 sort_order: 3,
3615 is_total: false,
3616 },
3617 CashFlowItem {
3618 item_code: "CF-AP".to_string(),
3619 label: "Change in Accounts Payable".to_string(),
3620 category: CashFlowCategory::Operating,
3621 amount: -ap_change,
3622 amount_prior: None,
3623 sort_order: 4,
3624 is_total: false,
3625 },
3626 CashFlowItem {
3627 item_code: "CF-INV".to_string(),
3628 label: "Change in Inventory".to_string(),
3629 category: CashFlowCategory::Operating,
3630 amount: -inventory_change,
3631 amount_prior: None,
3632 sort_order: 5,
3633 is_total: false,
3634 },
3635 CashFlowItem {
3636 item_code: "CF-OP".to_string(),
3637 label: "Net Cash from Operating Activities".to_string(),
3638 category: CashFlowCategory::Operating,
3639 amount: operating_cf,
3640 amount_prior: None,
3641 sort_order: 6,
3642 is_total: true,
3643 },
3644 CashFlowItem {
3645 item_code: "CF-CAPEX".to_string(),
3646 label: "Capital Expenditures".to_string(),
3647 category: CashFlowCategory::Investing,
3648 amount: capex,
3649 amount_prior: None,
3650 sort_order: 7,
3651 is_total: false,
3652 },
3653 CashFlowItem {
3654 item_code: "CF-INV-T".to_string(),
3655 label: "Net Cash from Investing Activities".to_string(),
3656 category: CashFlowCategory::Investing,
3657 amount: investing_cf,
3658 amount_prior: None,
3659 sort_order: 8,
3660 is_total: true,
3661 },
3662 CashFlowItem {
3663 item_code: "CF-DEBT".to_string(),
3664 label: "Net Borrowings / (Repayments)".to_string(),
3665 category: CashFlowCategory::Financing,
3666 amount: debt_change,
3667 amount_prior: None,
3668 sort_order: 9,
3669 is_total: false,
3670 },
3671 CashFlowItem {
3672 item_code: "CF-EQ".to_string(),
3673 label: "Equity Changes".to_string(),
3674 category: CashFlowCategory::Financing,
3675 amount: equity_change,
3676 amount_prior: None,
3677 sort_order: 10,
3678 is_total: false,
3679 },
3680 CashFlowItem {
3681 item_code: "CF-FIN-T".to_string(),
3682 label: "Net Cash from Financing Activities".to_string(),
3683 category: CashFlowCategory::Financing,
3684 amount: financing_cf,
3685 amount_prior: None,
3686 sort_order: 11,
3687 is_total: true,
3688 },
3689 CashFlowItem {
3690 item_code: "CF-NET".to_string(),
3691 label: "Net Change in Cash".to_string(),
3692 category: CashFlowCategory::Operating,
3693 amount: net_change,
3694 amount_prior: None,
3695 sort_order: 12,
3696 is_total: true,
3697 },
3698 ]
3699 }
3700
3701 fn calculate_net_income_from_tb(
3705 tb: &[datasynth_generators::TrialBalanceEntry],
3706 ) -> rust_decimal::Decimal {
3707 use rust_decimal::Decimal;
3708
3709 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3710 for entry in tb {
3711 let net = entry.debit_balance - entry.credit_balance;
3712 *aggregated.entry(entry.category.clone()).or_default() += net;
3713 }
3714
3715 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3716 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3717 let opex = *aggregated
3718 .get("OperatingExpenses")
3719 .unwrap_or(&Decimal::ZERO);
3720 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3721 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3722
3723 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3726 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
3728 operating_income - tax
3729 }
3730
3731 fn category_from_account_code(code: &str) -> String {
3738 let prefix: String = code.chars().take(2).collect();
3739 match prefix.as_str() {
3740 "10" => "Cash",
3741 "11" => "Receivables",
3742 "12" | "13" | "14" => "Inventory",
3743 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3744 "20" => "Payables",
3745 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3746 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3747 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3748 "40" | "41" | "42" | "43" | "44" => "Revenue",
3749 "50" | "51" | "52" => "CostOfSales",
3750 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3751 "OperatingExpenses"
3752 }
3753 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3754 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3755 _ => "OperatingExpenses",
3756 }
3757 .to_string()
3758 }
3759
3760 fn phase_hr_data(
3762 &mut self,
3763 stats: &mut EnhancedGenerationStatistics,
3764 ) -> SynthResult<HrSnapshot> {
3765 if !self.config.hr.enabled {
3766 debug!("Phase 16: Skipped (HR generation disabled)");
3767 return Ok(HrSnapshot::default());
3768 }
3769
3770 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3771
3772 let seed = self.seed;
3773 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3774 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3775 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3776 let company_code = self
3777 .config
3778 .companies
3779 .first()
3780 .map(|c| c.code.as_str())
3781 .unwrap_or("1000");
3782 let currency = self
3783 .config
3784 .companies
3785 .first()
3786 .map(|c| c.currency.as_str())
3787 .unwrap_or("USD");
3788
3789 let employee_ids: Vec<String> = self
3790 .master_data
3791 .employees
3792 .iter()
3793 .map(|e| e.employee_id.clone())
3794 .collect();
3795
3796 if employee_ids.is_empty() {
3797 debug!("Phase 16: Skipped (no employees available)");
3798 return Ok(HrSnapshot::default());
3799 }
3800
3801 let cost_center_ids: Vec<String> = self
3804 .master_data
3805 .employees
3806 .iter()
3807 .filter_map(|e| e.cost_center.clone())
3808 .collect::<std::collections::HashSet<_>>()
3809 .into_iter()
3810 .collect();
3811
3812 let mut snapshot = HrSnapshot::default();
3813
3814 if self.config.hr.payroll.enabled {
3816 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3817 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3818
3819 let payroll_pack = self.primary_pack();
3821
3822 payroll_gen.set_country_pack(payroll_pack.clone());
3825
3826 let employees_with_salary: Vec<(
3827 String,
3828 rust_decimal::Decimal,
3829 Option<String>,
3830 Option<String>,
3831 )> = self
3832 .master_data
3833 .employees
3834 .iter()
3835 .map(|e| {
3836 (
3837 e.employee_id.clone(),
3838 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3840 e.department_id.clone(),
3841 )
3842 })
3843 .collect();
3844
3845 for month in 0..self.config.global.period_months {
3846 let period_start = start_date + chrono::Months::new(month);
3847 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3848 let (run, items) = payroll_gen.generate(
3849 company_code,
3850 &employees_with_salary,
3851 period_start,
3852 period_end,
3853 currency,
3854 );
3855 snapshot.payroll_runs.push(run);
3856 snapshot.payroll_run_count += 1;
3857 snapshot.payroll_line_item_count += items.len();
3858 snapshot.payroll_line_items.extend(items);
3859 }
3860 }
3861
3862 if self.config.hr.time_attendance.enabled {
3864 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3865 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3866 let entries = time_gen.generate(
3867 &employee_ids,
3868 start_date,
3869 end_date,
3870 &self.config.hr.time_attendance,
3871 );
3872 snapshot.time_entry_count = entries.len();
3873 snapshot.time_entries = entries;
3874 }
3875
3876 if self.config.hr.expenses.enabled {
3878 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3879 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3880 expense_gen.set_country_pack(self.primary_pack().clone());
3881 let company_currency = self
3882 .config
3883 .companies
3884 .first()
3885 .map(|c| c.currency.as_str())
3886 .unwrap_or("USD");
3887 let reports = expense_gen.generate_with_currency(
3888 &employee_ids,
3889 start_date,
3890 end_date,
3891 &self.config.hr.expenses,
3892 company_currency,
3893 );
3894 snapshot.expense_report_count = reports.len();
3895 snapshot.expense_reports = reports;
3896 }
3897
3898 if self.config.hr.payroll.enabled {
3900 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3901 let employee_pairs: Vec<(String, String)> = self
3902 .master_data
3903 .employees
3904 .iter()
3905 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3906 .collect();
3907 let enrollments =
3908 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3909 snapshot.benefit_enrollment_count = enrollments.len();
3910 snapshot.benefit_enrollments = enrollments;
3911 }
3912
3913 stats.payroll_run_count = snapshot.payroll_run_count;
3914 stats.time_entry_count = snapshot.time_entry_count;
3915 stats.expense_report_count = snapshot.expense_report_count;
3916 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3917
3918 info!(
3919 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3920 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3921 snapshot.time_entry_count, snapshot.expense_report_count,
3922 snapshot.benefit_enrollment_count
3923 );
3924 self.check_resources_with_log("post-hr")?;
3925
3926 Ok(snapshot)
3927 }
3928
3929 fn phase_accounting_standards(
3931 &mut self,
3932 stats: &mut EnhancedGenerationStatistics,
3933 ) -> SynthResult<AccountingStandardsSnapshot> {
3934 if !self.phase_config.generate_accounting_standards
3935 || !self.config.accounting_standards.enabled
3936 {
3937 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3938 return Ok(AccountingStandardsSnapshot::default());
3939 }
3940 info!("Phase 17: Generating Accounting Standards Data");
3941
3942 let seed = self.seed;
3943 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3944 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3945 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3946 let company_code = self
3947 .config
3948 .companies
3949 .first()
3950 .map(|c| c.code.as_str())
3951 .unwrap_or("1000");
3952 let currency = self
3953 .config
3954 .companies
3955 .first()
3956 .map(|c| c.currency.as_str())
3957 .unwrap_or("USD");
3958
3959 let framework = match self.config.accounting_standards.framework {
3964 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3965 datasynth_standards::framework::AccountingFramework::UsGaap
3966 }
3967 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3968 datasynth_standards::framework::AccountingFramework::Ifrs
3969 }
3970 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3971 datasynth_standards::framework::AccountingFramework::DualReporting
3972 }
3973 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3974 datasynth_standards::framework::AccountingFramework::FrenchGaap
3975 }
3976 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3977 datasynth_standards::framework::AccountingFramework::GermanGaap
3978 }
3979 None => {
3980 let pack = self.primary_pack();
3982 let pack_fw = pack.accounting.framework.as_str();
3983 match pack_fw {
3984 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3985 "dual_reporting" => {
3986 datasynth_standards::framework::AccountingFramework::DualReporting
3987 }
3988 "french_gaap" => {
3989 datasynth_standards::framework::AccountingFramework::FrenchGaap
3990 }
3991 "german_gaap" | "hgb" => {
3992 datasynth_standards::framework::AccountingFramework::GermanGaap
3993 }
3994 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3996 }
3997 }
3998 };
3999
4000 let mut snapshot = AccountingStandardsSnapshot::default();
4001
4002 if self.config.accounting_standards.revenue_recognition.enabled {
4004 let customer_ids: Vec<String> = self
4005 .master_data
4006 .customers
4007 .iter()
4008 .map(|c| c.customer_id.clone())
4009 .collect();
4010
4011 if !customer_ids.is_empty() {
4012 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
4013 let contracts = rev_gen.generate(
4014 company_code,
4015 &customer_ids,
4016 start_date,
4017 end_date,
4018 currency,
4019 &self.config.accounting_standards.revenue_recognition,
4020 framework,
4021 );
4022 snapshot.revenue_contract_count = contracts.len();
4023 snapshot.contracts = contracts;
4024 }
4025 }
4026
4027 if self.config.accounting_standards.impairment.enabled {
4029 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
4030 .master_data
4031 .assets
4032 .iter()
4033 .map(|a| {
4034 (
4035 a.asset_id.clone(),
4036 a.description.clone(),
4037 a.acquisition_cost,
4038 )
4039 })
4040 .collect();
4041
4042 if !asset_data.is_empty() {
4043 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4044 let tests = imp_gen.generate(
4045 company_code,
4046 &asset_data,
4047 end_date,
4048 &self.config.accounting_standards.impairment,
4049 framework,
4050 );
4051 snapshot.impairment_test_count = tests.len();
4052 snapshot.impairment_tests = tests;
4053 }
4054 }
4055
4056 stats.revenue_contract_count = snapshot.revenue_contract_count;
4057 stats.impairment_test_count = snapshot.impairment_test_count;
4058
4059 info!(
4060 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4061 snapshot.revenue_contract_count, snapshot.impairment_test_count
4062 );
4063 self.check_resources_with_log("post-accounting-standards")?;
4064
4065 Ok(snapshot)
4066 }
4067
4068 fn phase_manufacturing(
4070 &mut self,
4071 stats: &mut EnhancedGenerationStatistics,
4072 ) -> SynthResult<ManufacturingSnapshot> {
4073 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4074 debug!("Phase 18: Skipped (manufacturing generation disabled)");
4075 return Ok(ManufacturingSnapshot::default());
4076 }
4077 info!("Phase 18: Generating Manufacturing Data");
4078
4079 let seed = self.seed;
4080 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4081 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4082 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4083 let company_code = self
4084 .config
4085 .companies
4086 .first()
4087 .map(|c| c.code.as_str())
4088 .unwrap_or("1000");
4089
4090 let material_data: Vec<(String, String)> = self
4091 .master_data
4092 .materials
4093 .iter()
4094 .map(|m| (m.material_id.clone(), m.description.clone()))
4095 .collect();
4096
4097 if material_data.is_empty() {
4098 debug!("Phase 18: Skipped (no materials available)");
4099 return Ok(ManufacturingSnapshot::default());
4100 }
4101
4102 let mut snapshot = ManufacturingSnapshot::default();
4103
4104 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4106 let production_orders = prod_gen.generate(
4107 company_code,
4108 &material_data,
4109 start_date,
4110 end_date,
4111 &self.config.manufacturing.production_orders,
4112 &self.config.manufacturing.costing,
4113 &self.config.manufacturing.routing,
4114 );
4115 snapshot.production_order_count = production_orders.len();
4116
4117 let inspection_data: Vec<(String, String, String)> = production_orders
4119 .iter()
4120 .map(|po| {
4121 (
4122 po.order_id.clone(),
4123 po.material_id.clone(),
4124 po.material_description.clone(),
4125 )
4126 })
4127 .collect();
4128
4129 snapshot.production_orders = production_orders;
4130
4131 if !inspection_data.is_empty() {
4132 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4133 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4134 snapshot.quality_inspection_count = inspections.len();
4135 snapshot.quality_inspections = inspections;
4136 }
4137
4138 let storage_locations: Vec<(String, String)> = material_data
4140 .iter()
4141 .enumerate()
4142 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4143 .collect();
4144
4145 let employee_ids: Vec<String> = self
4146 .master_data
4147 .employees
4148 .iter()
4149 .map(|e| e.employee_id.clone())
4150 .collect();
4151 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4152 .with_employee_pool(employee_ids);
4153 let mut cycle_count_total = 0usize;
4154 for month in 0..self.config.global.period_months {
4155 let count_date = start_date + chrono::Months::new(month);
4156 let items_per_count = storage_locations.len().clamp(10, 50);
4157 let cc = cc_gen.generate(
4158 company_code,
4159 &storage_locations,
4160 count_date,
4161 items_per_count,
4162 );
4163 snapshot.cycle_counts.push(cc);
4164 cycle_count_total += 1;
4165 }
4166 snapshot.cycle_count_count = cycle_count_total;
4167
4168 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4170 let bom_components = bom_gen.generate(company_code, &material_data);
4171 snapshot.bom_component_count = bom_components.len();
4172 snapshot.bom_components = bom_components;
4173
4174 let currency = self
4176 .config
4177 .companies
4178 .first()
4179 .map(|c| c.currency.as_str())
4180 .unwrap_or("USD");
4181 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4182 let inventory_movements = inv_mov_gen.generate(
4183 company_code,
4184 &material_data,
4185 start_date,
4186 end_date,
4187 2,
4188 currency,
4189 );
4190 snapshot.inventory_movement_count = inventory_movements.len();
4191 snapshot.inventory_movements = inventory_movements;
4192
4193 stats.production_order_count = snapshot.production_order_count;
4194 stats.quality_inspection_count = snapshot.quality_inspection_count;
4195 stats.cycle_count_count = snapshot.cycle_count_count;
4196 stats.bom_component_count = snapshot.bom_component_count;
4197 stats.inventory_movement_count = snapshot.inventory_movement_count;
4198
4199 info!(
4200 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4201 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4202 snapshot.bom_component_count, snapshot.inventory_movement_count
4203 );
4204 self.check_resources_with_log("post-manufacturing")?;
4205
4206 Ok(snapshot)
4207 }
4208
4209 fn phase_sales_kpi_budgets(
4211 &mut self,
4212 coa: &Arc<ChartOfAccounts>,
4213 financial_reporting: &FinancialReportingSnapshot,
4214 stats: &mut EnhancedGenerationStatistics,
4215 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4216 if !self.phase_config.generate_sales_kpi_budgets {
4217 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4218 return Ok(SalesKpiBudgetsSnapshot::default());
4219 }
4220 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4221
4222 let seed = self.seed;
4223 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4224 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4225 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4226 let company_code = self
4227 .config
4228 .companies
4229 .first()
4230 .map(|c| c.code.as_str())
4231 .unwrap_or("1000");
4232
4233 let mut snapshot = SalesKpiBudgetsSnapshot::default();
4234
4235 if self.config.sales_quotes.enabled {
4237 let customer_data: Vec<(String, String)> = self
4238 .master_data
4239 .customers
4240 .iter()
4241 .map(|c| (c.customer_id.clone(), c.name.clone()))
4242 .collect();
4243 let material_data: Vec<(String, String)> = self
4244 .master_data
4245 .materials
4246 .iter()
4247 .map(|m| (m.material_id.clone(), m.description.clone()))
4248 .collect();
4249
4250 if !customer_data.is_empty() && !material_data.is_empty() {
4251 let employee_ids: Vec<String> = self
4252 .master_data
4253 .employees
4254 .iter()
4255 .map(|e| e.employee_id.clone())
4256 .collect();
4257 let customer_ids: Vec<String> = self
4258 .master_data
4259 .customers
4260 .iter()
4261 .map(|c| c.customer_id.clone())
4262 .collect();
4263 let company_currency = self
4264 .config
4265 .companies
4266 .first()
4267 .map(|c| c.currency.as_str())
4268 .unwrap_or("USD");
4269
4270 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4271 .with_pools(employee_ids, customer_ids);
4272 let quotes = quote_gen.generate_with_currency(
4273 company_code,
4274 &customer_data,
4275 &material_data,
4276 start_date,
4277 end_date,
4278 &self.config.sales_quotes,
4279 company_currency,
4280 );
4281 snapshot.sales_quote_count = quotes.len();
4282 snapshot.sales_quotes = quotes;
4283 }
4284 }
4285
4286 if self.config.financial_reporting.management_kpis.enabled {
4288 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4289 let mut kpis = kpi_gen.generate(
4290 company_code,
4291 start_date,
4292 end_date,
4293 &self.config.financial_reporting.management_kpis,
4294 );
4295
4296 {
4298 use rust_decimal::Decimal;
4299
4300 if let Some(income_stmt) =
4301 financial_reporting.financial_statements.iter().find(|fs| {
4302 fs.statement_type == StatementType::IncomeStatement
4303 && fs.company_code == company_code
4304 })
4305 {
4306 let total_revenue: Decimal = income_stmt
4308 .line_items
4309 .iter()
4310 .filter(|li| li.section.contains("Revenue") && !li.is_total)
4311 .map(|li| li.amount)
4312 .sum();
4313 let total_cogs: Decimal = income_stmt
4314 .line_items
4315 .iter()
4316 .filter(|li| {
4317 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4318 && !li.is_total
4319 })
4320 .map(|li| li.amount.abs())
4321 .sum();
4322 let total_opex: Decimal = income_stmt
4323 .line_items
4324 .iter()
4325 .filter(|li| {
4326 li.section.contains("Expense")
4327 && !li.is_total
4328 && !li.section.contains("Cost")
4329 })
4330 .map(|li| li.amount.abs())
4331 .sum();
4332
4333 if total_revenue > Decimal::ZERO {
4334 let hundred = Decimal::from(100);
4335 let gross_margin_pct =
4336 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4337 let operating_income = total_revenue - total_cogs - total_opex;
4338 let op_margin_pct =
4339 (operating_income * hundred / total_revenue).round_dp(2);
4340
4341 for kpi in &mut kpis {
4343 if kpi.name == "Gross Margin" {
4344 kpi.value = gross_margin_pct;
4345 } else if kpi.name == "Operating Margin" {
4346 kpi.value = op_margin_pct;
4347 }
4348 }
4349 }
4350 }
4351
4352 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4354 fs.statement_type == StatementType::BalanceSheet
4355 && fs.company_code == company_code
4356 }) {
4357 let current_assets: Decimal = bs
4358 .line_items
4359 .iter()
4360 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4361 .map(|li| li.amount)
4362 .sum();
4363 let current_liabilities: Decimal = bs
4364 .line_items
4365 .iter()
4366 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4367 .map(|li| li.amount.abs())
4368 .sum();
4369
4370 if current_liabilities > Decimal::ZERO {
4371 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4372 for kpi in &mut kpis {
4373 if kpi.name == "Current Ratio" {
4374 kpi.value = current_ratio;
4375 }
4376 }
4377 }
4378 }
4379 }
4380
4381 snapshot.kpi_count = kpis.len();
4382 snapshot.kpis = kpis;
4383 }
4384
4385 if self.config.financial_reporting.budgets.enabled {
4387 let account_data: Vec<(String, String)> = coa
4388 .accounts
4389 .iter()
4390 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4391 .collect();
4392
4393 if !account_data.is_empty() {
4394 let fiscal_year = start_date.year() as u32;
4395 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4396 let budget = budget_gen.generate(
4397 company_code,
4398 fiscal_year,
4399 &account_data,
4400 &self.config.financial_reporting.budgets,
4401 );
4402 snapshot.budget_line_count = budget.line_items.len();
4403 snapshot.budgets.push(budget);
4404 }
4405 }
4406
4407 stats.sales_quote_count = snapshot.sales_quote_count;
4408 stats.kpi_count = snapshot.kpi_count;
4409 stats.budget_line_count = snapshot.budget_line_count;
4410
4411 info!(
4412 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4413 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4414 );
4415 self.check_resources_with_log("post-sales-kpi-budgets")?;
4416
4417 Ok(snapshot)
4418 }
4419
4420 fn phase_tax_generation(
4422 &mut self,
4423 document_flows: &DocumentFlowSnapshot,
4424 stats: &mut EnhancedGenerationStatistics,
4425 ) -> SynthResult<TaxSnapshot> {
4426 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4427 debug!("Phase 20: Skipped (tax generation disabled)");
4428 return Ok(TaxSnapshot::default());
4429 }
4430 info!("Phase 20: Generating Tax Data");
4431
4432 let seed = self.seed;
4433 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4434 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4435 let fiscal_year = start_date.year();
4436 let company_code = self
4437 .config
4438 .companies
4439 .first()
4440 .map(|c| c.code.as_str())
4441 .unwrap_or("1000");
4442
4443 let mut gen =
4444 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4445
4446 let pack = self.primary_pack().clone();
4447 let (jurisdictions, codes) =
4448 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4449
4450 let mut provisions = Vec::new();
4452 if self.config.tax.provisions.enabled {
4453 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4454 for company in &self.config.companies {
4455 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4456 let statutory_rate = rust_decimal::Decimal::new(
4457 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4458 2,
4459 );
4460 let provision = provision_gen.generate(
4461 &company.code,
4462 start_date,
4463 pre_tax_income,
4464 statutory_rate,
4465 );
4466 provisions.push(provision);
4467 }
4468 }
4469
4470 let mut tax_lines = Vec::new();
4472 if !codes.is_empty() {
4473 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4474 datasynth_generators::TaxLineGeneratorConfig::default(),
4475 codes.clone(),
4476 seed + 72,
4477 );
4478
4479 let buyer_country = self
4482 .config
4483 .companies
4484 .first()
4485 .map(|c| c.country.as_str())
4486 .unwrap_or("US");
4487 for vi in &document_flows.vendor_invoices {
4488 let lines = tax_line_gen.generate_for_document(
4489 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4490 &vi.header.document_id,
4491 buyer_country, buyer_country,
4493 vi.payable_amount,
4494 vi.header.document_date,
4495 None,
4496 );
4497 tax_lines.extend(lines);
4498 }
4499
4500 for ci in &document_flows.customer_invoices {
4502 let lines = tax_line_gen.generate_for_document(
4503 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4504 &ci.header.document_id,
4505 buyer_country, buyer_country,
4507 ci.total_gross_amount,
4508 ci.header.document_date,
4509 None,
4510 );
4511 tax_lines.extend(lines);
4512 }
4513 }
4514
4515 let snapshot = TaxSnapshot {
4516 jurisdiction_count: jurisdictions.len(),
4517 code_count: codes.len(),
4518 jurisdictions,
4519 codes,
4520 tax_provisions: provisions,
4521 tax_lines,
4522 tax_returns: Vec::new(),
4523 withholding_records: Vec::new(),
4524 tax_anomaly_labels: Vec::new(),
4525 };
4526
4527 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4528 stats.tax_code_count = snapshot.code_count;
4529 stats.tax_provision_count = snapshot.tax_provisions.len();
4530 stats.tax_line_count = snapshot.tax_lines.len();
4531
4532 info!(
4533 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4534 snapshot.jurisdiction_count,
4535 snapshot.code_count,
4536 snapshot.tax_provisions.len()
4537 );
4538 self.check_resources_with_log("post-tax")?;
4539
4540 Ok(snapshot)
4541 }
4542
4543 fn phase_esg_generation(
4545 &mut self,
4546 document_flows: &DocumentFlowSnapshot,
4547 stats: &mut EnhancedGenerationStatistics,
4548 ) -> SynthResult<EsgSnapshot> {
4549 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4550 debug!("Phase 21: Skipped (ESG generation disabled)");
4551 return Ok(EsgSnapshot::default());
4552 }
4553 info!("Phase 21: Generating ESG Data");
4554
4555 let seed = self.seed;
4556 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4557 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4558 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4559 let entity_id = self
4560 .config
4561 .companies
4562 .first()
4563 .map(|c| c.code.as_str())
4564 .unwrap_or("1000");
4565
4566 let esg_cfg = &self.config.esg;
4567 let mut snapshot = EsgSnapshot::default();
4568
4569 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4571 esg_cfg.environmental.energy.clone(),
4572 seed + 80,
4573 );
4574 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4575
4576 let facility_count = esg_cfg.environmental.energy.facility_count;
4578 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4579 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4580
4581 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4583 seed + 82,
4584 esg_cfg.environmental.waste.diversion_target,
4585 facility_count,
4586 );
4587 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4588
4589 let mut emission_gen =
4591 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4592
4593 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4595 .iter()
4596 .map(|e| datasynth_generators::EnergyInput {
4597 facility_id: e.facility_id.clone(),
4598 energy_type: match e.energy_source {
4599 EnergySourceType::NaturalGas => {
4600 datasynth_generators::EnergyInputType::NaturalGas
4601 }
4602 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4603 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4604 _ => datasynth_generators::EnergyInputType::Electricity,
4605 },
4606 consumption_kwh: e.consumption_kwh,
4607 period: e.period,
4608 })
4609 .collect();
4610
4611 let mut emissions = Vec::new();
4612 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4613 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4614
4615 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4617 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4618 for payment in &document_flows.payments {
4619 if payment.is_vendor {
4620 *totals
4621 .entry(payment.business_partner_id.clone())
4622 .or_default() += payment.amount;
4623 }
4624 }
4625 totals
4626 };
4627 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4628 .master_data
4629 .vendors
4630 .iter()
4631 .map(|v| {
4632 let spend = vendor_payment_totals
4633 .get(&v.vendor_id)
4634 .copied()
4635 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4636 datasynth_generators::VendorSpendInput {
4637 vendor_id: v.vendor_id.clone(),
4638 category: format!("{:?}", v.vendor_type).to_lowercase(),
4639 spend,
4640 country: v.country.clone(),
4641 }
4642 })
4643 .collect();
4644 if !vendor_spend.is_empty() {
4645 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4646 entity_id,
4647 &vendor_spend,
4648 start_date,
4649 end_date,
4650 ));
4651 }
4652
4653 let headcount = self.master_data.employees.len() as u32;
4655 if headcount > 0 {
4656 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4657 emissions.extend(emission_gen.generate_scope3_business_travel(
4658 entity_id,
4659 travel_spend,
4660 start_date,
4661 ));
4662 emissions
4663 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4664 }
4665
4666 snapshot.emission_count = emissions.len();
4667 snapshot.emissions = emissions;
4668 snapshot.energy = energy_records;
4669
4670 let mut workforce_gen =
4672 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4673 let total_headcount = headcount.max(100);
4674 snapshot.diversity =
4675 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4676 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4677 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4678 entity_id,
4679 facility_count,
4680 start_date,
4681 end_date,
4682 );
4683
4684 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4687 entity_id,
4688 &snapshot.safety_incidents,
4689 total_hours,
4690 start_date,
4691 );
4692 snapshot.safety_metrics = vec![safety_metric];
4693
4694 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4696 seed + 85,
4697 esg_cfg.governance.board_size,
4698 esg_cfg.governance.independence_target,
4699 );
4700 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4701
4702 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4704 esg_cfg.supply_chain_esg.clone(),
4705 seed + 86,
4706 );
4707 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4708 .master_data
4709 .vendors
4710 .iter()
4711 .map(|v| datasynth_generators::VendorInput {
4712 vendor_id: v.vendor_id.clone(),
4713 country: v.country.clone(),
4714 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4715 quality_score: None,
4716 })
4717 .collect();
4718 snapshot.supplier_assessments =
4719 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4720
4721 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4723 seed + 87,
4724 esg_cfg.reporting.clone(),
4725 esg_cfg.climate_scenarios.clone(),
4726 );
4727 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4728 snapshot.disclosures = disclosure_gen.generate_disclosures(
4729 entity_id,
4730 &snapshot.materiality,
4731 start_date,
4732 end_date,
4733 );
4734 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4735 snapshot.disclosure_count = snapshot.disclosures.len();
4736
4737 if esg_cfg.anomaly_rate > 0.0 {
4739 let mut anomaly_injector =
4740 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4741 let mut labels = Vec::new();
4742 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4743 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4744 labels.extend(
4745 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4746 );
4747 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4748 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4749 snapshot.anomaly_labels = labels;
4750 }
4751
4752 stats.esg_emission_count = snapshot.emission_count;
4753 stats.esg_disclosure_count = snapshot.disclosure_count;
4754
4755 info!(
4756 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4757 snapshot.emission_count,
4758 snapshot.disclosure_count,
4759 snapshot.supplier_assessments.len()
4760 );
4761 self.check_resources_with_log("post-esg")?;
4762
4763 Ok(snapshot)
4764 }
4765
4766 fn phase_treasury_data(
4768 &mut self,
4769 document_flows: &DocumentFlowSnapshot,
4770 subledger: &SubledgerSnapshot,
4771 intercompany: &IntercompanySnapshot,
4772 stats: &mut EnhancedGenerationStatistics,
4773 ) -> SynthResult<TreasurySnapshot> {
4774 if !self.config.treasury.enabled {
4775 debug!("Phase 22: Skipped (treasury generation disabled)");
4776 return Ok(TreasurySnapshot::default());
4777 }
4778 info!("Phase 22: Generating Treasury Data");
4779
4780 let seed = self.seed;
4781 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4782 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4783 let currency = self
4784 .config
4785 .companies
4786 .first()
4787 .map(|c| c.currency.as_str())
4788 .unwrap_or("USD");
4789 let entity_id = self
4790 .config
4791 .companies
4792 .first()
4793 .map(|c| c.code.as_str())
4794 .unwrap_or("1000");
4795
4796 let mut snapshot = TreasurySnapshot::default();
4797
4798 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4800 self.config.treasury.debt.clone(),
4801 seed + 90,
4802 );
4803 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4804
4805 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4807 self.config.treasury.hedging.clone(),
4808 seed + 91,
4809 );
4810 for debt in &snapshot.debt_instruments {
4811 if debt.rate_type == InterestRateType::Variable {
4812 let swap = hedge_gen.generate_ir_swap(
4813 currency,
4814 debt.principal,
4815 debt.origination_date,
4816 debt.maturity_date,
4817 );
4818 snapshot.hedging_instruments.push(swap);
4819 }
4820 }
4821
4822 {
4825 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4826 for payment in &document_flows.payments {
4827 if payment.currency != currency {
4828 let entry = fx_map
4829 .entry(payment.currency.clone())
4830 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4831 entry.0 += payment.amount;
4832 if payment.header.document_date > entry.1 {
4834 entry.1 = payment.header.document_date;
4835 }
4836 }
4837 }
4838 if !fx_map.is_empty() {
4839 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4840 .into_iter()
4841 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4842 datasynth_generators::treasury::FxExposure {
4843 currency_pair: format!("{foreign_ccy}/{currency}"),
4844 foreign_currency: foreign_ccy,
4845 net_amount,
4846 settlement_date,
4847 description: "AP payment FX exposure".to_string(),
4848 }
4849 })
4850 .collect();
4851 let (fx_instruments, fx_relationships) =
4852 hedge_gen.generate(start_date, &fx_exposures);
4853 snapshot.hedging_instruments.extend(fx_instruments);
4854 snapshot.hedge_relationships.extend(fx_relationships);
4855 }
4856 }
4857
4858 if self.config.treasury.anomaly_rate > 0.0 {
4860 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4861 seed + 92,
4862 self.config.treasury.anomaly_rate,
4863 );
4864 let mut labels = Vec::new();
4865 labels.extend(
4866 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4867 );
4868 snapshot.treasury_anomaly_labels = labels;
4869 }
4870
4871 if self.config.treasury.cash_positioning.enabled {
4873 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4874
4875 for payment in &document_flows.payments {
4877 cash_flows.push(datasynth_generators::treasury::CashFlow {
4878 date: payment.header.document_date,
4879 account_id: format!("{entity_id}-MAIN"),
4880 amount: payment.amount,
4881 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4882 });
4883 }
4884
4885 for chain in &document_flows.o2c_chains {
4887 if let Some(ref receipt) = chain.customer_receipt {
4888 cash_flows.push(datasynth_generators::treasury::CashFlow {
4889 date: receipt.header.document_date,
4890 account_id: format!("{entity_id}-MAIN"),
4891 amount: receipt.amount,
4892 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4893 });
4894 }
4895 for receipt in &chain.remainder_receipts {
4897 cash_flows.push(datasynth_generators::treasury::CashFlow {
4898 date: receipt.header.document_date,
4899 account_id: format!("{entity_id}-MAIN"),
4900 amount: receipt.amount,
4901 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4902 });
4903 }
4904 }
4905
4906 if !cash_flows.is_empty() {
4907 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4908 self.config.treasury.cash_positioning.clone(),
4909 seed + 93,
4910 );
4911 let account_id = format!("{entity_id}-MAIN");
4912 snapshot.cash_positions = cash_gen.generate(
4913 entity_id,
4914 &account_id,
4915 currency,
4916 &cash_flows,
4917 start_date,
4918 start_date + chrono::Months::new(self.config.global.period_months),
4919 rust_decimal::Decimal::new(1_000_000, 0), );
4921 }
4922 }
4923
4924 if self.config.treasury.cash_forecasting.enabled {
4926 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4927
4928 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
4930 .ar_invoices
4931 .iter()
4932 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4933 .map(|inv| {
4934 let days_past_due = if inv.due_date < end_date {
4935 (end_date - inv.due_date).num_days().max(0) as u32
4936 } else {
4937 0
4938 };
4939 datasynth_generators::treasury::ArAgingItem {
4940 expected_date: inv.due_date,
4941 amount: inv.amount_remaining,
4942 days_past_due,
4943 document_id: inv.invoice_number.clone(),
4944 }
4945 })
4946 .collect();
4947
4948 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
4950 .ap_invoices
4951 .iter()
4952 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4953 .map(|inv| datasynth_generators::treasury::ApAgingItem {
4954 payment_date: inv.due_date,
4955 amount: inv.amount_remaining,
4956 document_id: inv.invoice_number.clone(),
4957 })
4958 .collect();
4959
4960 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
4961 self.config.treasury.cash_forecasting.clone(),
4962 seed + 94,
4963 );
4964 let forecast = forecast_gen.generate(
4965 entity_id,
4966 currency,
4967 end_date,
4968 &ar_items,
4969 &ap_items,
4970 &[], );
4972 snapshot.cash_forecasts.push(forecast);
4973 }
4974
4975 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
4977 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4978 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
4979 self.config.treasury.cash_pooling.clone(),
4980 seed + 95,
4981 );
4982
4983 let account_ids: Vec<String> = snapshot
4985 .cash_positions
4986 .iter()
4987 .map(|cp| cp.bank_account_id.clone())
4988 .collect::<std::collections::HashSet<_>>()
4989 .into_iter()
4990 .collect();
4991
4992 if let Some(pool) =
4993 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
4994 {
4995 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4997 for cp in &snapshot.cash_positions {
4998 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
4999 }
5000
5001 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
5002 latest_balances
5003 .into_iter()
5004 .filter(|(id, _)| pool.participant_accounts.contains(id))
5005 .map(
5006 |(id, balance)| datasynth_generators::treasury::AccountBalance {
5007 account_id: id,
5008 balance,
5009 },
5010 )
5011 .collect();
5012
5013 let sweeps =
5014 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
5015 snapshot.cash_pool_sweeps = sweeps;
5016 snapshot.cash_pools.push(pool);
5017 }
5018 }
5019
5020 if self.config.treasury.bank_guarantees.enabled {
5022 let vendor_names: Vec<String> = self
5023 .master_data
5024 .vendors
5025 .iter()
5026 .map(|v| v.name.clone())
5027 .collect();
5028 if !vendor_names.is_empty() {
5029 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
5030 self.config.treasury.bank_guarantees.clone(),
5031 seed + 96,
5032 );
5033 snapshot.bank_guarantees =
5034 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5035 }
5036 }
5037
5038 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5040 let entity_ids: Vec<String> = self
5041 .config
5042 .companies
5043 .iter()
5044 .map(|c| c.code.clone())
5045 .collect();
5046 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5047 .matched_pairs
5048 .iter()
5049 .map(|mp| {
5050 (
5051 mp.seller_company.clone(),
5052 mp.buyer_company.clone(),
5053 mp.amount,
5054 )
5055 })
5056 .collect();
5057 if entity_ids.len() >= 2 {
5058 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5059 self.config.treasury.netting.clone(),
5060 seed + 97,
5061 );
5062 snapshot.netting_runs = netting_gen.generate(
5063 &entity_ids,
5064 currency,
5065 start_date,
5066 self.config.global.period_months,
5067 &ic_amounts,
5068 );
5069 }
5070 }
5071
5072 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5073 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5074 stats.cash_position_count = snapshot.cash_positions.len();
5075 stats.cash_forecast_count = snapshot.cash_forecasts.len();
5076 stats.cash_pool_count = snapshot.cash_pools.len();
5077
5078 info!(
5079 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5080 snapshot.debt_instruments.len(),
5081 snapshot.hedging_instruments.len(),
5082 snapshot.cash_positions.len(),
5083 snapshot.cash_forecasts.len(),
5084 snapshot.cash_pools.len(),
5085 snapshot.bank_guarantees.len(),
5086 snapshot.netting_runs.len(),
5087 );
5088 self.check_resources_with_log("post-treasury")?;
5089
5090 Ok(snapshot)
5091 }
5092
5093 fn phase_project_accounting(
5095 &mut self,
5096 document_flows: &DocumentFlowSnapshot,
5097 hr: &HrSnapshot,
5098 stats: &mut EnhancedGenerationStatistics,
5099 ) -> SynthResult<ProjectAccountingSnapshot> {
5100 if !self.config.project_accounting.enabled {
5101 debug!("Phase 23: Skipped (project accounting disabled)");
5102 return Ok(ProjectAccountingSnapshot::default());
5103 }
5104 info!("Phase 23: Generating Project Accounting Data");
5105
5106 let seed = self.seed;
5107 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5108 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5109 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5110 let company_code = self
5111 .config
5112 .companies
5113 .first()
5114 .map(|c| c.code.as_str())
5115 .unwrap_or("1000");
5116
5117 let mut snapshot = ProjectAccountingSnapshot::default();
5118
5119 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5121 self.config.project_accounting.clone(),
5122 seed + 95,
5123 );
5124 let pool = project_gen.generate(company_code, start_date, end_date);
5125 snapshot.projects = pool.projects.clone();
5126
5127 {
5129 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5130 Vec::new();
5131
5132 for te in &hr.time_entries {
5134 let total_hours = te.hours_regular + te.hours_overtime;
5135 if total_hours > 0.0 {
5136 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5137 id: te.entry_id.clone(),
5138 entity_id: company_code.to_string(),
5139 date: te.date,
5140 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5141 .unwrap_or(rust_decimal::Decimal::ZERO),
5142 source_type: CostSourceType::TimeEntry,
5143 hours: Some(
5144 rust_decimal::Decimal::from_f64_retain(total_hours)
5145 .unwrap_or(rust_decimal::Decimal::ZERO),
5146 ),
5147 });
5148 }
5149 }
5150
5151 for er in &hr.expense_reports {
5153 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5154 id: er.report_id.clone(),
5155 entity_id: company_code.to_string(),
5156 date: er.submission_date,
5157 amount: er.total_amount,
5158 source_type: CostSourceType::ExpenseReport,
5159 hours: None,
5160 });
5161 }
5162
5163 for po in &document_flows.purchase_orders {
5165 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5166 id: po.header.document_id.clone(),
5167 entity_id: company_code.to_string(),
5168 date: po.header.document_date,
5169 amount: po.total_net_amount,
5170 source_type: CostSourceType::PurchaseOrder,
5171 hours: None,
5172 });
5173 }
5174
5175 for vi in &document_flows.vendor_invoices {
5177 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5178 id: vi.header.document_id.clone(),
5179 entity_id: company_code.to_string(),
5180 date: vi.header.document_date,
5181 amount: vi.payable_amount,
5182 source_type: CostSourceType::VendorInvoice,
5183 hours: None,
5184 });
5185 }
5186
5187 if !source_docs.is_empty() && !pool.projects.is_empty() {
5188 let mut cost_gen =
5189 datasynth_generators::project_accounting::ProjectCostGenerator::new(
5190 self.config.project_accounting.cost_allocation.clone(),
5191 seed + 99,
5192 );
5193 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5194 }
5195 }
5196
5197 if self.config.project_accounting.change_orders.enabled {
5199 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5200 self.config.project_accounting.change_orders.clone(),
5201 seed + 96,
5202 );
5203 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5204 }
5205
5206 if self.config.project_accounting.milestones.enabled {
5208 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5209 self.config.project_accounting.milestones.clone(),
5210 seed + 97,
5211 );
5212 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5213 }
5214
5215 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5217 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5218 self.config.project_accounting.earned_value.clone(),
5219 seed + 98,
5220 );
5221 snapshot.earned_value_metrics =
5222 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5223 }
5224
5225 stats.project_count = snapshot.projects.len();
5226 stats.project_change_order_count = snapshot.change_orders.len();
5227 stats.project_cost_line_count = snapshot.cost_lines.len();
5228
5229 info!(
5230 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5231 snapshot.projects.len(),
5232 snapshot.change_orders.len(),
5233 snapshot.milestones.len(),
5234 snapshot.earned_value_metrics.len()
5235 );
5236 self.check_resources_with_log("post-project-accounting")?;
5237
5238 Ok(snapshot)
5239 }
5240
5241 fn phase_evolution_events(
5243 &mut self,
5244 stats: &mut EnhancedGenerationStatistics,
5245 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5246 if !self.phase_config.generate_evolution_events {
5247 debug!("Phase 24: Skipped (evolution events disabled)");
5248 return Ok((Vec::new(), Vec::new()));
5249 }
5250 info!("Phase 24: Generating Process Evolution + Organizational Events");
5251
5252 let seed = self.seed;
5253 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5254 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5255 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5256
5257 let mut proc_gen =
5259 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5260 seed + 100,
5261 );
5262 let process_events = proc_gen.generate_events(start_date, end_date);
5263
5264 let company_codes: Vec<String> = self
5266 .config
5267 .companies
5268 .iter()
5269 .map(|c| c.code.clone())
5270 .collect();
5271 let mut org_gen =
5272 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5273 seed + 101,
5274 );
5275 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5276
5277 stats.process_evolution_event_count = process_events.len();
5278 stats.organizational_event_count = org_events.len();
5279
5280 info!(
5281 "Evolution events generated: {} process evolution, {} organizational",
5282 process_events.len(),
5283 org_events.len()
5284 );
5285 self.check_resources_with_log("post-evolution-events")?;
5286
5287 Ok((process_events, org_events))
5288 }
5289
5290 fn phase_disruption_events(
5293 &self,
5294 stats: &mut EnhancedGenerationStatistics,
5295 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5296 if !self.config.organizational_events.enabled {
5297 debug!("Phase 24b: Skipped (organizational events disabled)");
5298 return Ok(Vec::new());
5299 }
5300 info!("Phase 24b: Generating Disruption Events");
5301
5302 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5303 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5304 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5305
5306 let company_codes: Vec<String> = self
5307 .config
5308 .companies
5309 .iter()
5310 .map(|c| c.code.clone())
5311 .collect();
5312
5313 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5314 let events = gen.generate(start_date, end_date, &company_codes);
5315
5316 stats.disruption_event_count = events.len();
5317 info!("Disruption events generated: {} events", events.len());
5318 self.check_resources_with_log("post-disruption-events")?;
5319
5320 Ok(events)
5321 }
5322
5323 fn phase_counterfactuals(
5330 &self,
5331 journal_entries: &[JournalEntry],
5332 stats: &mut EnhancedGenerationStatistics,
5333 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5334 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5335 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5336 return Ok(Vec::new());
5337 }
5338 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5339
5340 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5341
5342 let mut gen = CounterfactualGenerator::new(self.seed + 110);
5343
5344 let specs = [
5346 CounterfactualSpec::ScaleAmount { factor: 2.5 },
5347 CounterfactualSpec::ShiftDate { days: -14 },
5348 CounterfactualSpec::SelfApprove,
5349 CounterfactualSpec::SplitTransaction { split_count: 3 },
5350 ];
5351
5352 let pairs: Vec<_> = journal_entries
5353 .iter()
5354 .enumerate()
5355 .map(|(i, je)| {
5356 let spec = &specs[i % specs.len()];
5357 gen.generate(je, spec)
5358 })
5359 .collect();
5360
5361 stats.counterfactual_pair_count = pairs.len();
5362 info!(
5363 "Counterfactual pairs generated: {} pairs from {} journal entries",
5364 pairs.len(),
5365 journal_entries.len()
5366 );
5367 self.check_resources_with_log("post-counterfactuals")?;
5368
5369 Ok(pairs)
5370 }
5371
5372 fn phase_red_flags(
5379 &self,
5380 anomaly_labels: &AnomalyLabels,
5381 document_flows: &DocumentFlowSnapshot,
5382 stats: &mut EnhancedGenerationStatistics,
5383 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5384 if !self.config.fraud.enabled {
5385 debug!("Phase 26: Skipped (fraud generation disabled)");
5386 return Ok(Vec::new());
5387 }
5388 info!("Phase 26: Generating Fraud Red-Flag Indicators");
5389
5390 use datasynth_generators::fraud::RedFlagGenerator;
5391
5392 let generator = RedFlagGenerator::new();
5393 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5394
5395 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5397 .labels
5398 .iter()
5399 .filter(|label| label.anomaly_type.is_intentional())
5400 .map(|label| label.document_id.as_str())
5401 .collect();
5402
5403 let mut flags = Vec::new();
5404
5405 for chain in &document_flows.p2p_chains {
5407 let doc_id = &chain.purchase_order.header.document_id;
5408 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5409 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5410 }
5411
5412 for chain in &document_flows.o2c_chains {
5414 let doc_id = &chain.sales_order.header.document_id;
5415 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5416 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5417 }
5418
5419 stats.red_flag_count = flags.len();
5420 info!(
5421 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5422 flags.len(),
5423 document_flows.p2p_chains.len(),
5424 document_flows.o2c_chains.len(),
5425 fraud_doc_ids.len()
5426 );
5427 self.check_resources_with_log("post-red-flags")?;
5428
5429 Ok(flags)
5430 }
5431
5432 fn phase_collusion_rings(
5438 &mut self,
5439 stats: &mut EnhancedGenerationStatistics,
5440 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5441 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5442 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5443 return Ok(Vec::new());
5444 }
5445 info!("Phase 26b: Generating Collusion Rings");
5446
5447 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5448 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5449 let months = self.config.global.period_months;
5450
5451 let employee_ids: Vec<String> = self
5452 .master_data
5453 .employees
5454 .iter()
5455 .map(|e| e.employee_id.clone())
5456 .collect();
5457 let vendor_ids: Vec<String> = self
5458 .master_data
5459 .vendors
5460 .iter()
5461 .map(|v| v.vendor_id.clone())
5462 .collect();
5463
5464 let mut generator =
5465 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5466 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5467
5468 stats.collusion_ring_count = rings.len();
5469 info!(
5470 "Collusion rings generated: {} rings, total members: {}",
5471 rings.len(),
5472 rings
5473 .iter()
5474 .map(datasynth_generators::fraud::CollusionRing::size)
5475 .sum::<usize>()
5476 );
5477 self.check_resources_with_log("post-collusion-rings")?;
5478
5479 Ok(rings)
5480 }
5481
5482 fn phase_temporal_attributes(
5487 &mut self,
5488 stats: &mut EnhancedGenerationStatistics,
5489 ) -> SynthResult<
5490 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5491 > {
5492 if !self.config.temporal_attributes.enabled {
5493 debug!("Phase 27: Skipped (temporal attributes disabled)");
5494 return Ok(Vec::new());
5495 }
5496 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5497
5498 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5499 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5500
5501 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5505 || self.config.temporal_attributes.enabled;
5506 let temporal_config = {
5507 let ta = &self.config.temporal_attributes;
5508 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5509 .enabled(ta.enabled)
5510 .closed_probability(ta.valid_time.closed_probability)
5511 .avg_validity_days(ta.valid_time.avg_validity_days)
5512 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5513 .with_version_chains(if generate_version_chains {
5514 ta.avg_versions_per_entity
5515 } else {
5516 1.0
5517 })
5518 .build()
5519 };
5520 let temporal_config = if self
5522 .config
5523 .temporal_attributes
5524 .transaction_time
5525 .allow_backdating
5526 {
5527 let mut c = temporal_config;
5528 c.transaction_time.allow_backdating = true;
5529 c.transaction_time.backdating_probability = self
5530 .config
5531 .temporal_attributes
5532 .transaction_time
5533 .backdating_probability;
5534 c.transaction_time.max_backdate_days = self
5535 .config
5536 .temporal_attributes
5537 .transaction_time
5538 .max_backdate_days;
5539 c
5540 } else {
5541 temporal_config
5542 };
5543 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5544 temporal_config,
5545 self.seed + 130,
5546 start_date,
5547 );
5548
5549 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5550 self.seed + 130,
5551 datasynth_core::GeneratorType::Vendor,
5552 );
5553
5554 let chains: Vec<_> = self
5555 .master_data
5556 .vendors
5557 .iter()
5558 .map(|vendor| {
5559 let id = uuid_factory.next();
5560 gen.generate_version_chain(vendor.clone(), id)
5561 })
5562 .collect();
5563
5564 stats.temporal_version_chain_count = chains.len();
5565 info!("Temporal version chains generated: {} chains", chains.len());
5566 self.check_resources_with_log("post-temporal-attributes")?;
5567
5568 Ok(chains)
5569 }
5570
5571 fn phase_entity_relationships(
5581 &self,
5582 journal_entries: &[JournalEntry],
5583 document_flows: &DocumentFlowSnapshot,
5584 stats: &mut EnhancedGenerationStatistics,
5585 ) -> SynthResult<(
5586 Option<datasynth_core::models::EntityGraph>,
5587 Vec<datasynth_core::models::CrossProcessLink>,
5588 )> {
5589 use datasynth_generators::relationships::{
5590 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5591 TransactionSummary,
5592 };
5593
5594 let rs_enabled = self.config.relationship_strength.enabled;
5595 let cpl_enabled = self.config.cross_process_links.enabled
5596 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5597
5598 if !rs_enabled && !cpl_enabled {
5599 debug!(
5600 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5601 );
5602 return Ok((None, Vec::new()));
5603 }
5604
5605 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5606
5607 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5608 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5609
5610 let company_code = self
5611 .config
5612 .companies
5613 .first()
5614 .map(|c| c.code.as_str())
5615 .unwrap_or("1000");
5616
5617 let gen_config = EntityGraphConfig {
5619 enabled: rs_enabled,
5620 cross_process: datasynth_generators::relationships::CrossProcessConfig {
5621 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5622 enable_return_flows: false,
5623 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5624 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5625 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5627 1.0
5628 } else {
5629 0.30
5630 },
5631 ..Default::default()
5632 },
5633 strength_config: datasynth_generators::relationships::StrengthConfig {
5634 transaction_volume_weight: self
5635 .config
5636 .relationship_strength
5637 .calculation
5638 .transaction_volume_weight,
5639 transaction_count_weight: self
5640 .config
5641 .relationship_strength
5642 .calculation
5643 .transaction_count_weight,
5644 duration_weight: self
5645 .config
5646 .relationship_strength
5647 .calculation
5648 .relationship_duration_weight,
5649 recency_weight: self.config.relationship_strength.calculation.recency_weight,
5650 mutual_connections_weight: self
5651 .config
5652 .relationship_strength
5653 .calculation
5654 .mutual_connections_weight,
5655 recency_half_life_days: self
5656 .config
5657 .relationship_strength
5658 .calculation
5659 .recency_half_life_days,
5660 },
5661 ..Default::default()
5662 };
5663
5664 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5665
5666 let entity_graph = if rs_enabled {
5668 let vendor_summaries: Vec<EntitySummary> = self
5670 .master_data
5671 .vendors
5672 .iter()
5673 .map(|v| {
5674 EntitySummary::new(
5675 &v.vendor_id,
5676 &v.name,
5677 datasynth_core::models::GraphEntityType::Vendor,
5678 start_date,
5679 )
5680 })
5681 .collect();
5682
5683 let customer_summaries: Vec<EntitySummary> = self
5684 .master_data
5685 .customers
5686 .iter()
5687 .map(|c| {
5688 EntitySummary::new(
5689 &c.customer_id,
5690 &c.name,
5691 datasynth_core::models::GraphEntityType::Customer,
5692 start_date,
5693 )
5694 })
5695 .collect();
5696
5697 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5702 std::collections::HashMap::new();
5703
5704 for je in journal_entries {
5705 let cc = je.header.company_code.clone();
5706 let posting_date = je.header.posting_date;
5707 for line in &je.lines {
5708 if let Some(ref tp) = line.trading_partner {
5709 let amount = if line.debit_amount > line.credit_amount {
5710 line.debit_amount
5711 } else {
5712 line.credit_amount
5713 };
5714 let entry = txn_summaries
5715 .entry((cc.clone(), tp.clone()))
5716 .or_insert_with(|| TransactionSummary {
5717 total_volume: rust_decimal::Decimal::ZERO,
5718 transaction_count: 0,
5719 first_transaction_date: posting_date,
5720 last_transaction_date: posting_date,
5721 related_entities: std::collections::HashSet::new(),
5722 });
5723 entry.total_volume += amount;
5724 entry.transaction_count += 1;
5725 if posting_date < entry.first_transaction_date {
5726 entry.first_transaction_date = posting_date;
5727 }
5728 if posting_date > entry.last_transaction_date {
5729 entry.last_transaction_date = posting_date;
5730 }
5731 entry.related_entities.insert(cc.clone());
5732 }
5733 }
5734 }
5735
5736 for chain in &document_flows.p2p_chains {
5739 let cc = chain.purchase_order.header.company_code.clone();
5740 let vendor_id = chain.purchase_order.vendor_id.clone();
5741 let po_date = chain.purchase_order.header.document_date;
5742 let amount = chain.purchase_order.total_net_amount;
5743
5744 let entry = txn_summaries
5745 .entry((cc.clone(), vendor_id))
5746 .or_insert_with(|| TransactionSummary {
5747 total_volume: rust_decimal::Decimal::ZERO,
5748 transaction_count: 0,
5749 first_transaction_date: po_date,
5750 last_transaction_date: po_date,
5751 related_entities: std::collections::HashSet::new(),
5752 });
5753 entry.total_volume += amount;
5754 entry.transaction_count += 1;
5755 if po_date < entry.first_transaction_date {
5756 entry.first_transaction_date = po_date;
5757 }
5758 if po_date > entry.last_transaction_date {
5759 entry.last_transaction_date = po_date;
5760 }
5761 entry.related_entities.insert(cc);
5762 }
5763
5764 for chain in &document_flows.o2c_chains {
5766 let cc = chain.sales_order.header.company_code.clone();
5767 let customer_id = chain.sales_order.customer_id.clone();
5768 let so_date = chain.sales_order.header.document_date;
5769 let amount = chain.sales_order.total_net_amount;
5770
5771 let entry = txn_summaries
5772 .entry((cc.clone(), customer_id))
5773 .or_insert_with(|| TransactionSummary {
5774 total_volume: rust_decimal::Decimal::ZERO,
5775 transaction_count: 0,
5776 first_transaction_date: so_date,
5777 last_transaction_date: so_date,
5778 related_entities: std::collections::HashSet::new(),
5779 });
5780 entry.total_volume += amount;
5781 entry.transaction_count += 1;
5782 if so_date < entry.first_transaction_date {
5783 entry.first_transaction_date = so_date;
5784 }
5785 if so_date > entry.last_transaction_date {
5786 entry.last_transaction_date = so_date;
5787 }
5788 entry.related_entities.insert(cc);
5789 }
5790
5791 let as_of_date = journal_entries
5792 .last()
5793 .map(|je| je.header.posting_date)
5794 .unwrap_or(start_date);
5795
5796 let graph = gen.generate_entity_graph(
5797 company_code,
5798 as_of_date,
5799 &vendor_summaries,
5800 &customer_summaries,
5801 &txn_summaries,
5802 );
5803
5804 info!(
5805 "Entity relationship graph: {} nodes, {} edges",
5806 graph.nodes.len(),
5807 graph.edges.len()
5808 );
5809 stats.entity_relationship_node_count = graph.nodes.len();
5810 stats.entity_relationship_edge_count = graph.edges.len();
5811 Some(graph)
5812 } else {
5813 None
5814 };
5815
5816 let cross_process_links = if cpl_enabled {
5818 let gr_refs: Vec<GoodsReceiptRef> = document_flows
5820 .p2p_chains
5821 .iter()
5822 .flat_map(|chain| {
5823 let vendor_id = chain.purchase_order.vendor_id.clone();
5824 let cc = chain.purchase_order.header.company_code.clone();
5825 chain.goods_receipts.iter().flat_map(move |gr| {
5826 gr.items.iter().filter_map({
5827 let doc_id = gr.header.document_id.clone();
5828 let v_id = vendor_id.clone();
5829 let company = cc.clone();
5830 let receipt_date = gr.header.document_date;
5831 move |item| {
5832 item.base
5833 .material_id
5834 .as_ref()
5835 .map(|mat_id| GoodsReceiptRef {
5836 document_id: doc_id.clone(),
5837 material_id: mat_id.clone(),
5838 quantity: item.base.quantity,
5839 receipt_date,
5840 vendor_id: v_id.clone(),
5841 company_code: company.clone(),
5842 })
5843 }
5844 })
5845 })
5846 })
5847 .collect();
5848
5849 let del_refs: Vec<DeliveryRef> = document_flows
5851 .o2c_chains
5852 .iter()
5853 .flat_map(|chain| {
5854 let customer_id = chain.sales_order.customer_id.clone();
5855 let cc = chain.sales_order.header.company_code.clone();
5856 chain.deliveries.iter().flat_map(move |del| {
5857 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5858 del.items.iter().filter_map({
5859 let doc_id = del.header.document_id.clone();
5860 let c_id = customer_id.clone();
5861 let company = cc.clone();
5862 move |item| {
5863 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5864 document_id: doc_id.clone(),
5865 material_id: mat_id.clone(),
5866 quantity: item.base.quantity,
5867 delivery_date,
5868 customer_id: c_id.clone(),
5869 company_code: company.clone(),
5870 })
5871 }
5872 })
5873 })
5874 })
5875 .collect();
5876
5877 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5878 info!("Cross-process links generated: {} links", links.len());
5879 stats.cross_process_link_count = links.len();
5880 links
5881 } else {
5882 Vec::new()
5883 };
5884
5885 self.check_resources_with_log("post-entity-relationships")?;
5886 Ok((entity_graph, cross_process_links))
5887 }
5888
5889 fn phase_industry_data(
5891 &self,
5892 stats: &mut EnhancedGenerationStatistics,
5893 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5894 if !self.config.industry_specific.enabled {
5895 return None;
5896 }
5897 info!("Phase 29: Generating industry-specific data");
5898 let output = datasynth_generators::industry::factory::generate_industry_output(
5899 self.config.global.industry,
5900 );
5901 stats.industry_gl_account_count = output.gl_accounts.len();
5902 info!(
5903 "Industry data generated: {} GL accounts for {:?}",
5904 output.gl_accounts.len(),
5905 self.config.global.industry
5906 );
5907 Some(output)
5908 }
5909
5910 fn phase_opening_balances(
5912 &mut self,
5913 coa: &Arc<ChartOfAccounts>,
5914 stats: &mut EnhancedGenerationStatistics,
5915 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5916 if !self.config.balance.generate_opening_balances {
5917 debug!("Phase 3b: Skipped (opening balance generation disabled)");
5918 return Ok(Vec::new());
5919 }
5920 info!("Phase 3b: Generating Opening Balances");
5921
5922 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5923 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5924 let fiscal_year = start_date.year();
5925
5926 let industry = match self.config.global.industry {
5927 IndustrySector::Manufacturing => IndustryType::Manufacturing,
5928 IndustrySector::Retail => IndustryType::Retail,
5929 IndustrySector::FinancialServices => IndustryType::Financial,
5930 IndustrySector::Healthcare => IndustryType::Healthcare,
5931 IndustrySector::Technology => IndustryType::Technology,
5932 _ => IndustryType::Manufacturing,
5933 };
5934
5935 let config = datasynth_generators::OpeningBalanceConfig {
5936 industry,
5937 ..Default::default()
5938 };
5939 let mut gen =
5940 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
5941
5942 let mut results = Vec::new();
5943 for company in &self.config.companies {
5944 let spec = OpeningBalanceSpec::new(
5945 company.code.clone(),
5946 start_date,
5947 fiscal_year,
5948 company.currency.clone(),
5949 rust_decimal::Decimal::new(10_000_000, 0),
5950 industry,
5951 );
5952 let ob = gen.generate(&spec, coa, start_date, &company.code);
5953 results.push(ob);
5954 }
5955
5956 stats.opening_balance_count = results.len();
5957 info!("Opening balances generated: {} companies", results.len());
5958 self.check_resources_with_log("post-opening-balances")?;
5959
5960 Ok(results)
5961 }
5962
5963 fn phase_subledger_reconciliation(
5965 &mut self,
5966 subledger: &SubledgerSnapshot,
5967 entries: &[JournalEntry],
5968 stats: &mut EnhancedGenerationStatistics,
5969 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
5970 if !self.config.balance.reconcile_subledgers {
5971 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
5972 return Ok(Vec::new());
5973 }
5974 info!("Phase 9b: Reconciling GL to subledger balances");
5975
5976 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5977 .map(|d| d + chrono::Months::new(self.config.global.period_months))
5978 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979
5980 let tracker_config = BalanceTrackerConfig {
5982 validate_on_each_entry: false,
5983 track_history: false,
5984 fail_on_validation_error: false,
5985 ..Default::default()
5986 };
5987 let recon_currency = self
5988 .config
5989 .companies
5990 .first()
5991 .map(|c| c.currency.clone())
5992 .unwrap_or_else(|| "USD".to_string());
5993 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
5994 let validation_errors = tracker.apply_entries(entries);
5995 if !validation_errors.is_empty() {
5996 warn!(
5997 error_count = validation_errors.len(),
5998 "Balance tracker encountered validation errors during subledger reconciliation"
5999 );
6000 for err in &validation_errors {
6001 debug!("Balance validation error: {:?}", err);
6002 }
6003 }
6004
6005 let mut engine = datasynth_generators::ReconciliationEngine::new(
6006 datasynth_generators::ReconciliationConfig::default(),
6007 );
6008
6009 let mut results = Vec::new();
6010 let company_code = self
6011 .config
6012 .companies
6013 .first()
6014 .map(|c| c.code.as_str())
6015 .unwrap_or("1000");
6016
6017 if !subledger.ar_invoices.is_empty() {
6019 let gl_balance = tracker
6020 .get_account_balance(
6021 company_code,
6022 datasynth_core::accounts::control_accounts::AR_CONTROL,
6023 )
6024 .map(|b| b.closing_balance)
6025 .unwrap_or_default();
6026 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
6027 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
6028 }
6029
6030 if !subledger.ap_invoices.is_empty() {
6032 let gl_balance = tracker
6033 .get_account_balance(
6034 company_code,
6035 datasynth_core::accounts::control_accounts::AP_CONTROL,
6036 )
6037 .map(|b| b.closing_balance)
6038 .unwrap_or_default();
6039 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6040 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6041 }
6042
6043 if !subledger.fa_records.is_empty() {
6045 let gl_asset_balance = tracker
6046 .get_account_balance(
6047 company_code,
6048 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6049 )
6050 .map(|b| b.closing_balance)
6051 .unwrap_or_default();
6052 let gl_accum_depr_balance = tracker
6053 .get_account_balance(
6054 company_code,
6055 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6056 )
6057 .map(|b| b.closing_balance)
6058 .unwrap_or_default();
6059 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6060 subledger.fa_records.iter().collect();
6061 let (asset_recon, depr_recon) = engine.reconcile_fa(
6062 company_code,
6063 end_date,
6064 gl_asset_balance,
6065 gl_accum_depr_balance,
6066 &fa_refs,
6067 );
6068 results.push(asset_recon);
6069 results.push(depr_recon);
6070 }
6071
6072 if !subledger.inventory_positions.is_empty() {
6074 let gl_balance = tracker
6075 .get_account_balance(
6076 company_code,
6077 datasynth_core::accounts::control_accounts::INVENTORY,
6078 )
6079 .map(|b| b.closing_balance)
6080 .unwrap_or_default();
6081 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6082 subledger.inventory_positions.iter().collect();
6083 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6084 }
6085
6086 stats.subledger_reconciliation_count = results.len();
6087 info!(
6088 "Subledger reconciliation complete: {} reconciliations",
6089 results.len()
6090 );
6091 self.check_resources_with_log("post-subledger-reconciliation")?;
6092
6093 Ok(results)
6094 }
6095
6096 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6098 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6099
6100 let coa_framework = self.resolve_coa_framework();
6101
6102 let mut gen = ChartOfAccountsGenerator::new(
6103 self.config.chart_of_accounts.complexity,
6104 self.config.global.industry,
6105 self.seed,
6106 )
6107 .with_coa_framework(coa_framework);
6108
6109 let coa = Arc::new(gen.generate());
6110 self.coa = Some(Arc::clone(&coa));
6111
6112 if let Some(pb) = pb {
6113 pb.finish_with_message("Chart of Accounts complete");
6114 }
6115
6116 Ok(coa)
6117 }
6118
6119 fn generate_master_data(&mut self) -> SynthResult<()> {
6121 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6122 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6123 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6124
6125 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
6127
6128 let pack = self.primary_pack().clone();
6130
6131 let vendors_per_company = self.phase_config.vendors_per_company;
6133 let customers_per_company = self.phase_config.customers_per_company;
6134 let materials_per_company = self.phase_config.materials_per_company;
6135 let assets_per_company = self.phase_config.assets_per_company;
6136 let coa_framework = self.resolve_coa_framework();
6137
6138 let per_company_results: Vec<_> = self
6141 .config
6142 .companies
6143 .par_iter()
6144 .enumerate()
6145 .map(|(i, company)| {
6146 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6147 let pack = pack.clone();
6148
6149 let mut vendor_gen = VendorGenerator::new(company_seed);
6151 vendor_gen.set_country_pack(pack.clone());
6152 vendor_gen.set_coa_framework(coa_framework);
6153 vendor_gen.set_counter_offset(i * vendors_per_company);
6154 let vendor_pool =
6155 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6156
6157 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6159 customer_gen.set_country_pack(pack.clone());
6160 customer_gen.set_coa_framework(coa_framework);
6161 customer_gen.set_counter_offset(i * customers_per_company);
6162 let customer_pool = customer_gen.generate_customer_pool(
6163 customers_per_company,
6164 &company.code,
6165 start_date,
6166 );
6167
6168 let mut material_gen = MaterialGenerator::new(company_seed + 200);
6170 material_gen.set_country_pack(pack.clone());
6171 material_gen.set_counter_offset(i * materials_per_company);
6172 let material_pool = material_gen.generate_material_pool(
6173 materials_per_company,
6174 &company.code,
6175 start_date,
6176 );
6177
6178 let mut asset_gen = AssetGenerator::new(company_seed + 300);
6180 let asset_pool = asset_gen.generate_asset_pool(
6181 assets_per_company,
6182 &company.code,
6183 (start_date, end_date),
6184 );
6185
6186 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6188 employee_gen.set_country_pack(pack);
6189 let employee_pool =
6190 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6191
6192 (
6193 vendor_pool.vendors,
6194 customer_pool.customers,
6195 material_pool.materials,
6196 asset_pool.assets,
6197 employee_pool.employees,
6198 )
6199 })
6200 .collect();
6201
6202 for (vendors, customers, materials, assets, employees) in per_company_results {
6204 self.master_data.vendors.extend(vendors);
6205 self.master_data.customers.extend(customers);
6206 self.master_data.materials.extend(materials);
6207 self.master_data.assets.extend(assets);
6208 self.master_data.employees.extend(employees);
6209 }
6210
6211 if let Some(pb) = &pb {
6212 pb.inc(total);
6213 }
6214 if let Some(pb) = pb {
6215 pb.finish_with_message("Master data generation complete");
6216 }
6217
6218 Ok(())
6219 }
6220
6221 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6223 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6224 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6225
6226 let months = (self.config.global.period_months as usize).max(1);
6229 let p2p_count = self
6230 .phase_config
6231 .p2p_chains
6232 .min(self.master_data.vendors.len() * 2 * months);
6233 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6234
6235 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6237 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6238 p2p_gen.set_country_pack(self.primary_pack().clone());
6239
6240 for i in 0..p2p_count {
6241 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6242 let materials: Vec<&Material> = self
6243 .master_data
6244 .materials
6245 .iter()
6246 .skip(i % self.master_data.materials.len().max(1))
6247 .take(2.min(self.master_data.materials.len()))
6248 .collect();
6249
6250 if materials.is_empty() {
6251 continue;
6252 }
6253
6254 let company = &self.config.companies[i % self.config.companies.len()];
6255 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6256 let fiscal_period = po_date.month() as u8;
6257 let created_by = if self.master_data.employees.is_empty() {
6258 "SYSTEM"
6259 } else {
6260 self.master_data.employees[i % self.master_data.employees.len()]
6261 .user_id
6262 .as_str()
6263 };
6264
6265 let chain = p2p_gen.generate_chain(
6266 &company.code,
6267 vendor,
6268 &materials,
6269 po_date,
6270 start_date.year() as u16,
6271 fiscal_period,
6272 created_by,
6273 );
6274
6275 flows.purchase_orders.push(chain.purchase_order.clone());
6277 flows.goods_receipts.extend(chain.goods_receipts.clone());
6278 if let Some(vi) = &chain.vendor_invoice {
6279 flows.vendor_invoices.push(vi.clone());
6280 }
6281 if let Some(payment) = &chain.payment {
6282 flows.payments.push(payment.clone());
6283 }
6284 for remainder in &chain.remainder_payments {
6285 flows.payments.push(remainder.clone());
6286 }
6287 flows.p2p_chains.push(chain);
6288
6289 if let Some(pb) = &pb {
6290 pb.inc(1);
6291 }
6292 }
6293
6294 if let Some(pb) = pb {
6295 pb.finish_with_message("P2P document flows complete");
6296 }
6297
6298 let o2c_count = self
6301 .phase_config
6302 .o2c_chains
6303 .min(self.master_data.customers.len() * 2 * months);
6304 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6305
6306 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6308 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6309 o2c_gen.set_country_pack(self.primary_pack().clone());
6310
6311 for i in 0..o2c_count {
6312 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6313 let materials: Vec<&Material> = self
6314 .master_data
6315 .materials
6316 .iter()
6317 .skip(i % self.master_data.materials.len().max(1))
6318 .take(2.min(self.master_data.materials.len()))
6319 .collect();
6320
6321 if materials.is_empty() {
6322 continue;
6323 }
6324
6325 let company = &self.config.companies[i % self.config.companies.len()];
6326 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6327 let fiscal_period = so_date.month() as u8;
6328 let created_by = if self.master_data.employees.is_empty() {
6329 "SYSTEM"
6330 } else {
6331 self.master_data.employees[i % self.master_data.employees.len()]
6332 .user_id
6333 .as_str()
6334 };
6335
6336 let chain = o2c_gen.generate_chain(
6337 &company.code,
6338 customer,
6339 &materials,
6340 so_date,
6341 start_date.year() as u16,
6342 fiscal_period,
6343 created_by,
6344 );
6345
6346 flows.sales_orders.push(chain.sales_order.clone());
6348 flows.deliveries.extend(chain.deliveries.clone());
6349 if let Some(ci) = &chain.customer_invoice {
6350 flows.customer_invoices.push(ci.clone());
6351 }
6352 if let Some(receipt) = &chain.customer_receipt {
6353 flows.payments.push(receipt.clone());
6354 }
6355 for receipt in &chain.remainder_receipts {
6357 flows.payments.push(receipt.clone());
6358 }
6359 flows.o2c_chains.push(chain);
6360
6361 if let Some(pb) = &pb {
6362 pb.inc(1);
6363 }
6364 }
6365
6366 if let Some(pb) = pb {
6367 pb.finish_with_message("O2C document flows complete");
6368 }
6369
6370 Ok(())
6371 }
6372
6373 fn generate_journal_entries(
6375 &mut self,
6376 coa: &Arc<ChartOfAccounts>,
6377 ) -> SynthResult<Vec<JournalEntry>> {
6378 use datasynth_core::traits::ParallelGenerator;
6379
6380 let total = self.calculate_total_transactions();
6381 let pb = self.create_progress_bar(total, "Generating Journal Entries");
6382
6383 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6384 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6385 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6386
6387 let company_codes: Vec<String> = self
6388 .config
6389 .companies
6390 .iter()
6391 .map(|c| c.code.clone())
6392 .collect();
6393
6394 let generator = JournalEntryGenerator::new_with_params(
6395 self.config.transactions.clone(),
6396 Arc::clone(coa),
6397 company_codes,
6398 start_date,
6399 end_date,
6400 self.seed,
6401 );
6402
6403 let je_pack = self.primary_pack();
6407
6408 let mut generator = generator
6409 .with_master_data(
6410 &self.master_data.vendors,
6411 &self.master_data.customers,
6412 &self.master_data.materials,
6413 )
6414 .with_country_pack_names(je_pack)
6415 .with_country_pack_temporal(
6416 self.config.temporal_patterns.clone(),
6417 self.seed + 200,
6418 je_pack,
6419 )
6420 .with_persona_errors(true)
6421 .with_fraud_config(self.config.fraud.clone());
6422
6423 if self.config.temporal.enabled {
6425 let drift_config = self.config.temporal.to_core_config();
6426 generator = generator.with_drift_config(drift_config, self.seed + 100);
6427 }
6428
6429 self.check_memory_limit()?;
6431
6432 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6434
6435 let entries = if total >= 10_000 && num_threads > 1 {
6439 let sub_generators = generator.split(num_threads);
6442 let entries_per_thread = total as usize / num_threads;
6443 let remainder = total as usize % num_threads;
6444
6445 let batches: Vec<Vec<JournalEntry>> = sub_generators
6446 .into_par_iter()
6447 .enumerate()
6448 .map(|(i, mut gen)| {
6449 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6450 gen.generate_batch(count)
6451 })
6452 .collect();
6453
6454 let entries = JournalEntryGenerator::merge_results(batches);
6456
6457 if let Some(pb) = &pb {
6458 pb.inc(total);
6459 }
6460 entries
6461 } else {
6462 let mut entries = Vec::with_capacity(total as usize);
6464 for _ in 0..total {
6465 let entry = generator.generate();
6466 entries.push(entry);
6467 if let Some(pb) = &pb {
6468 pb.inc(1);
6469 }
6470 }
6471 entries
6472 };
6473
6474 if let Some(pb) = pb {
6475 pb.finish_with_message("Journal entries complete");
6476 }
6477
6478 Ok(entries)
6479 }
6480
6481 fn generate_jes_from_document_flows(
6486 &mut self,
6487 flows: &DocumentFlowSnapshot,
6488 ) -> SynthResult<Vec<JournalEntry>> {
6489 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6490 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6491
6492 let je_config = match self.resolve_coa_framework() {
6493 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6494 CoAFramework::GermanSkr04 => {
6495 let fa = datasynth_core::FrameworkAccounts::german_gaap();
6496 DocumentFlowJeConfig::from(&fa)
6497 }
6498 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6499 };
6500
6501 let populate_fec = je_config.populate_fec_fields;
6502 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6503
6504 if populate_fec {
6508 let mut aux_lookup = std::collections::HashMap::new();
6509 for vendor in &self.master_data.vendors {
6510 if let Some(ref aux) = vendor.auxiliary_gl_account {
6511 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6512 }
6513 }
6514 for customer in &self.master_data.customers {
6515 if let Some(ref aux) = customer.auxiliary_gl_account {
6516 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6517 }
6518 }
6519 if !aux_lookup.is_empty() {
6520 generator.set_auxiliary_account_lookup(aux_lookup);
6521 }
6522 }
6523
6524 let mut entries = Vec::new();
6525
6526 for chain in &flows.p2p_chains {
6528 let chain_entries = generator.generate_from_p2p_chain(chain);
6529 entries.extend(chain_entries);
6530 if let Some(pb) = &pb {
6531 pb.inc(1);
6532 }
6533 }
6534
6535 for chain in &flows.o2c_chains {
6537 let chain_entries = generator.generate_from_o2c_chain(chain);
6538 entries.extend(chain_entries);
6539 if let Some(pb) = &pb {
6540 pb.inc(1);
6541 }
6542 }
6543
6544 if let Some(pb) = pb {
6545 pb.finish_with_message(format!(
6546 "Generated {} JEs from document flows",
6547 entries.len()
6548 ));
6549 }
6550
6551 Ok(entries)
6552 }
6553
6554 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6560 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6561
6562 let mut jes = Vec::with_capacity(payroll_runs.len());
6563
6564 for run in payroll_runs {
6565 let mut je = JournalEntry::new_simple(
6566 format!("JE-PAYROLL-{}", run.payroll_id),
6567 run.company_code.clone(),
6568 run.run_date,
6569 format!("Payroll {}", run.payroll_id),
6570 );
6571
6572 je.add_line(JournalEntryLine {
6574 line_number: 1,
6575 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6576 debit_amount: run.total_gross,
6577 reference: Some(run.payroll_id.clone()),
6578 text: Some(format!(
6579 "Payroll {} ({} employees)",
6580 run.payroll_id, run.employee_count
6581 )),
6582 ..Default::default()
6583 });
6584
6585 je.add_line(JournalEntryLine {
6587 line_number: 2,
6588 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6589 credit_amount: run.total_gross,
6590 reference: Some(run.payroll_id.clone()),
6591 ..Default::default()
6592 });
6593
6594 jes.push(je);
6595 }
6596
6597 jes
6598 }
6599
6600 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6606 use datasynth_core::accounts::{control_accounts, expense_accounts};
6607 use datasynth_core::models::ProductionOrderStatus;
6608
6609 let mut jes = Vec::new();
6610
6611 for order in production_orders {
6612 if !matches!(
6614 order.status,
6615 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6616 ) {
6617 continue;
6618 }
6619
6620 let mut je = JournalEntry::new_simple(
6621 format!("JE-MFG-{}", order.order_id),
6622 order.company_code.clone(),
6623 order.actual_end.unwrap_or(order.planned_end),
6624 format!(
6625 "Production Order {} - {}",
6626 order.order_id, order.material_description
6627 ),
6628 );
6629
6630 je.add_line(JournalEntryLine {
6632 line_number: 1,
6633 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6634 debit_amount: order.actual_cost,
6635 reference: Some(order.order_id.clone()),
6636 text: Some(format!(
6637 "Material consumption for {}",
6638 order.material_description
6639 )),
6640 quantity: Some(order.actual_quantity),
6641 unit: Some("EA".to_string()),
6642 ..Default::default()
6643 });
6644
6645 je.add_line(JournalEntryLine {
6647 line_number: 2,
6648 gl_account: control_accounts::INVENTORY.to_string(),
6649 credit_amount: order.actual_cost,
6650 reference: Some(order.order_id.clone()),
6651 ..Default::default()
6652 });
6653
6654 jes.push(je);
6655 }
6656
6657 jes
6658 }
6659
6660 fn link_document_flows_to_subledgers(
6665 &mut self,
6666 flows: &DocumentFlowSnapshot,
6667 ) -> SynthResult<SubledgerSnapshot> {
6668 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6669 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6670
6671 let vendor_names: std::collections::HashMap<String, String> = self
6673 .master_data
6674 .vendors
6675 .iter()
6676 .map(|v| (v.vendor_id.clone(), v.name.clone()))
6677 .collect();
6678 let customer_names: std::collections::HashMap<String, String> = self
6679 .master_data
6680 .customers
6681 .iter()
6682 .map(|c| (c.customer_id.clone(), c.name.clone()))
6683 .collect();
6684
6685 let mut linker = DocumentFlowLinker::new()
6686 .with_vendor_names(vendor_names)
6687 .with_customer_names(customer_names);
6688
6689 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6691 if let Some(pb) = &pb {
6692 pb.inc(flows.vendor_invoices.len() as u64);
6693 }
6694
6695 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6697 if let Some(pb) = &pb {
6698 pb.inc(flows.customer_invoices.len() as u64);
6699 }
6700
6701 if let Some(pb) = pb {
6702 pb.finish_with_message(format!(
6703 "Linked {} AP and {} AR invoices",
6704 ap_invoices.len(),
6705 ar_invoices.len()
6706 ));
6707 }
6708
6709 Ok(SubledgerSnapshot {
6710 ap_invoices,
6711 ar_invoices,
6712 fa_records: Vec::new(),
6713 inventory_positions: Vec::new(),
6714 inventory_movements: Vec::new(),
6715 })
6716 }
6717
6718 #[allow(clippy::too_many_arguments)]
6723 fn generate_ocpm_events(
6724 &mut self,
6725 flows: &DocumentFlowSnapshot,
6726 sourcing: &SourcingSnapshot,
6727 hr: &HrSnapshot,
6728 manufacturing: &ManufacturingSnapshot,
6729 banking: &BankingSnapshot,
6730 audit: &AuditSnapshot,
6731 financial_reporting: &FinancialReportingSnapshot,
6732 ) -> SynthResult<OcpmSnapshot> {
6733 let total_chains = flows.p2p_chains.len()
6734 + flows.o2c_chains.len()
6735 + sourcing.sourcing_projects.len()
6736 + hr.payroll_runs.len()
6737 + manufacturing.production_orders.len()
6738 + banking.customers.len()
6739 + audit.engagements.len()
6740 + financial_reporting.bank_reconciliations.len();
6741 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6742
6743 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6745 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6746
6747 let ocpm_config = OcpmGeneratorConfig {
6749 generate_p2p: true,
6750 generate_o2c: true,
6751 generate_s2c: !sourcing.sourcing_projects.is_empty(),
6752 generate_h2r: !hr.payroll_runs.is_empty(),
6753 generate_mfg: !manufacturing.production_orders.is_empty(),
6754 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6755 generate_bank: !banking.customers.is_empty(),
6756 generate_audit: !audit.engagements.is_empty(),
6757 happy_path_rate: 0.75,
6758 exception_path_rate: 0.20,
6759 error_path_rate: 0.05,
6760 add_duration_variability: true,
6761 duration_std_dev_factor: 0.3,
6762 };
6763 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6764 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6765
6766 let available_users: Vec<String> = self
6768 .master_data
6769 .employees
6770 .iter()
6771 .take(20)
6772 .map(|e| e.user_id.clone())
6773 .collect();
6774
6775 let fallback_date =
6777 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6778 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6779 .unwrap_or(fallback_date);
6780 let base_midnight = base_date
6781 .and_hms_opt(0, 0, 0)
6782 .expect("midnight is always valid");
6783 let base_datetime =
6784 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6785
6786 let add_result = |event_log: &mut OcpmEventLog,
6788 result: datasynth_ocpm::CaseGenerationResult| {
6789 for event in result.events {
6790 event_log.add_event(event);
6791 }
6792 for object in result.objects {
6793 event_log.add_object(object);
6794 }
6795 for relationship in result.relationships {
6796 event_log.add_relationship(relationship);
6797 }
6798 for corr in result.correlation_events {
6799 event_log.add_correlation_event(corr);
6800 }
6801 event_log.add_case(result.case_trace);
6802 };
6803
6804 for chain in &flows.p2p_chains {
6806 let po = &chain.purchase_order;
6807 let documents = P2pDocuments::new(
6808 &po.header.document_id,
6809 &po.vendor_id,
6810 &po.header.company_code,
6811 po.total_net_amount,
6812 &po.header.currency,
6813 &ocpm_uuid_factory,
6814 )
6815 .with_goods_receipt(
6816 chain
6817 .goods_receipts
6818 .first()
6819 .map(|gr| gr.header.document_id.as_str())
6820 .unwrap_or(""),
6821 &ocpm_uuid_factory,
6822 )
6823 .with_invoice(
6824 chain
6825 .vendor_invoice
6826 .as_ref()
6827 .map(|vi| vi.header.document_id.as_str())
6828 .unwrap_or(""),
6829 &ocpm_uuid_factory,
6830 )
6831 .with_payment(
6832 chain
6833 .payment
6834 .as_ref()
6835 .map(|p| p.header.document_id.as_str())
6836 .unwrap_or(""),
6837 &ocpm_uuid_factory,
6838 );
6839
6840 let start_time =
6841 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6842 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6843 add_result(&mut event_log, result);
6844
6845 if let Some(pb) = &pb {
6846 pb.inc(1);
6847 }
6848 }
6849
6850 for chain in &flows.o2c_chains {
6852 let so = &chain.sales_order;
6853 let documents = O2cDocuments::new(
6854 &so.header.document_id,
6855 &so.customer_id,
6856 &so.header.company_code,
6857 so.total_net_amount,
6858 &so.header.currency,
6859 &ocpm_uuid_factory,
6860 )
6861 .with_delivery(
6862 chain
6863 .deliveries
6864 .first()
6865 .map(|d| d.header.document_id.as_str())
6866 .unwrap_or(""),
6867 &ocpm_uuid_factory,
6868 )
6869 .with_invoice(
6870 chain
6871 .customer_invoice
6872 .as_ref()
6873 .map(|ci| ci.header.document_id.as_str())
6874 .unwrap_or(""),
6875 &ocpm_uuid_factory,
6876 )
6877 .with_receipt(
6878 chain
6879 .customer_receipt
6880 .as_ref()
6881 .map(|r| r.header.document_id.as_str())
6882 .unwrap_or(""),
6883 &ocpm_uuid_factory,
6884 );
6885
6886 let start_time =
6887 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6888 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6889 add_result(&mut event_log, result);
6890
6891 if let Some(pb) = &pb {
6892 pb.inc(1);
6893 }
6894 }
6895
6896 for project in &sourcing.sourcing_projects {
6898 let vendor_id = sourcing
6900 .contracts
6901 .iter()
6902 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6903 .map(|c| c.vendor_id.clone())
6904 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6905 .or_else(|| {
6906 self.master_data
6907 .vendors
6908 .first()
6909 .map(|v| v.vendor_id.clone())
6910 })
6911 .unwrap_or_else(|| "V000".to_string());
6912 let mut docs = S2cDocuments::new(
6913 &project.project_id,
6914 &vendor_id,
6915 &project.company_code,
6916 project.estimated_annual_spend,
6917 &ocpm_uuid_factory,
6918 );
6919 if let Some(rfx) = sourcing
6921 .rfx_events
6922 .iter()
6923 .find(|r| r.sourcing_project_id == project.project_id)
6924 {
6925 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
6926 if let Some(bid) = sourcing.bids.iter().find(|b| {
6928 b.rfx_id == rfx.rfx_id
6929 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
6930 }) {
6931 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
6932 }
6933 }
6934 if let Some(contract) = sourcing
6936 .contracts
6937 .iter()
6938 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6939 {
6940 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
6941 }
6942 let start_time = base_datetime - chrono::Duration::days(90);
6943 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
6944 add_result(&mut event_log, result);
6945
6946 if let Some(pb) = &pb {
6947 pb.inc(1);
6948 }
6949 }
6950
6951 for run in &hr.payroll_runs {
6953 let employee_id = hr
6955 .payroll_line_items
6956 .iter()
6957 .find(|li| li.payroll_id == run.payroll_id)
6958 .map(|li| li.employee_id.as_str())
6959 .unwrap_or("EMP000");
6960 let docs = H2rDocuments::new(
6961 &run.payroll_id,
6962 employee_id,
6963 &run.company_code,
6964 run.total_gross,
6965 &ocpm_uuid_factory,
6966 )
6967 .with_time_entries(
6968 hr.time_entries
6969 .iter()
6970 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
6971 .take(5)
6972 .map(|t| t.entry_id.as_str())
6973 .collect(),
6974 );
6975 let start_time = base_datetime - chrono::Duration::days(30);
6976 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
6977 add_result(&mut event_log, result);
6978
6979 if let Some(pb) = &pb {
6980 pb.inc(1);
6981 }
6982 }
6983
6984 for order in &manufacturing.production_orders {
6986 let mut docs = MfgDocuments::new(
6987 &order.order_id,
6988 &order.material_id,
6989 &order.company_code,
6990 order.planned_quantity,
6991 &ocpm_uuid_factory,
6992 )
6993 .with_operations(
6994 order
6995 .operations
6996 .iter()
6997 .map(|o| format!("OP-{:04}", o.operation_number))
6998 .collect::<Vec<_>>()
6999 .iter()
7000 .map(std::string::String::as_str)
7001 .collect(),
7002 );
7003 if let Some(insp) = manufacturing
7005 .quality_inspections
7006 .iter()
7007 .find(|i| i.reference_id == order.order_id)
7008 {
7009 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
7010 }
7011 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
7013 cc.items
7014 .iter()
7015 .any(|item| item.material_id == order.material_id)
7016 }) {
7017 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
7018 }
7019 let start_time = base_datetime - chrono::Duration::days(60);
7020 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
7021 add_result(&mut event_log, result);
7022
7023 if let Some(pb) = &pb {
7024 pb.inc(1);
7025 }
7026 }
7027
7028 for customer in &banking.customers {
7030 let customer_id_str = customer.customer_id.to_string();
7031 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7032 if let Some(account) = banking
7034 .accounts
7035 .iter()
7036 .find(|a| a.primary_owner_id == customer.customer_id)
7037 {
7038 let account_id_str = account.account_id.to_string();
7039 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7040 let txn_strs: Vec<String> = banking
7042 .transactions
7043 .iter()
7044 .filter(|t| t.account_id == account.account_id)
7045 .take(10)
7046 .map(|t| t.transaction_id.to_string())
7047 .collect();
7048 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7049 let txn_amounts: Vec<rust_decimal::Decimal> = banking
7050 .transactions
7051 .iter()
7052 .filter(|t| t.account_id == account.account_id)
7053 .take(10)
7054 .map(|t| t.amount)
7055 .collect();
7056 if !txn_ids.is_empty() {
7057 docs = docs.with_transactions(txn_ids, txn_amounts);
7058 }
7059 }
7060 let start_time = base_datetime - chrono::Duration::days(180);
7061 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7062 add_result(&mut event_log, result);
7063
7064 if let Some(pb) = &pb {
7065 pb.inc(1);
7066 }
7067 }
7068
7069 for engagement in &audit.engagements {
7071 let engagement_id_str = engagement.engagement_id.to_string();
7072 let docs = AuditDocuments::new(
7073 &engagement_id_str,
7074 &engagement.client_entity_id,
7075 &ocpm_uuid_factory,
7076 )
7077 .with_workpapers(
7078 audit
7079 .workpapers
7080 .iter()
7081 .filter(|w| w.engagement_id == engagement.engagement_id)
7082 .take(10)
7083 .map(|w| w.workpaper_id.to_string())
7084 .collect::<Vec<_>>()
7085 .iter()
7086 .map(std::string::String::as_str)
7087 .collect(),
7088 )
7089 .with_evidence(
7090 audit
7091 .evidence
7092 .iter()
7093 .filter(|e| e.engagement_id == engagement.engagement_id)
7094 .take(10)
7095 .map(|e| e.evidence_id.to_string())
7096 .collect::<Vec<_>>()
7097 .iter()
7098 .map(std::string::String::as_str)
7099 .collect(),
7100 )
7101 .with_risks(
7102 audit
7103 .risk_assessments
7104 .iter()
7105 .filter(|r| r.engagement_id == engagement.engagement_id)
7106 .take(5)
7107 .map(|r| r.risk_id.to_string())
7108 .collect::<Vec<_>>()
7109 .iter()
7110 .map(std::string::String::as_str)
7111 .collect(),
7112 )
7113 .with_findings(
7114 audit
7115 .findings
7116 .iter()
7117 .filter(|f| f.engagement_id == engagement.engagement_id)
7118 .take(5)
7119 .map(|f| f.finding_id.to_string())
7120 .collect::<Vec<_>>()
7121 .iter()
7122 .map(std::string::String::as_str)
7123 .collect(),
7124 )
7125 .with_judgments(
7126 audit
7127 .judgments
7128 .iter()
7129 .filter(|j| j.engagement_id == engagement.engagement_id)
7130 .take(5)
7131 .map(|j| j.judgment_id.to_string())
7132 .collect::<Vec<_>>()
7133 .iter()
7134 .map(std::string::String::as_str)
7135 .collect(),
7136 );
7137 let start_time = base_datetime - chrono::Duration::days(120);
7138 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7139 add_result(&mut event_log, result);
7140
7141 if let Some(pb) = &pb {
7142 pb.inc(1);
7143 }
7144 }
7145
7146 for recon in &financial_reporting.bank_reconciliations {
7148 let docs = BankReconDocuments::new(
7149 &recon.reconciliation_id,
7150 &recon.bank_account_id,
7151 &recon.company_code,
7152 recon.bank_ending_balance,
7153 &ocpm_uuid_factory,
7154 )
7155 .with_statement_lines(
7156 recon
7157 .statement_lines
7158 .iter()
7159 .take(20)
7160 .map(|l| l.line_id.as_str())
7161 .collect(),
7162 )
7163 .with_reconciling_items(
7164 recon
7165 .reconciling_items
7166 .iter()
7167 .take(10)
7168 .map(|i| i.item_id.as_str())
7169 .collect(),
7170 );
7171 let start_time = base_datetime - chrono::Duration::days(30);
7172 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7173 add_result(&mut event_log, result);
7174
7175 if let Some(pb) = &pb {
7176 pb.inc(1);
7177 }
7178 }
7179
7180 event_log.compute_variants();
7182
7183 let summary = event_log.summary();
7184
7185 if let Some(pb) = pb {
7186 pb.finish_with_message(format!(
7187 "Generated {} OCPM events, {} objects",
7188 summary.event_count, summary.object_count
7189 ));
7190 }
7191
7192 Ok(OcpmSnapshot {
7193 event_count: summary.event_count,
7194 object_count: summary.object_count,
7195 case_count: summary.case_count,
7196 event_log: Some(event_log),
7197 })
7198 }
7199
7200 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7202 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7203
7204 let total_rate = if self.config.anomaly_injection.enabled {
7207 self.config.anomaly_injection.rates.total_rate
7208 } else if self.config.fraud.enabled {
7209 self.config.fraud.fraud_rate
7210 } else {
7211 0.02
7212 };
7213
7214 let fraud_rate = if self.config.anomaly_injection.enabled {
7215 self.config.anomaly_injection.rates.fraud_rate
7216 } else {
7217 AnomalyRateConfig::default().fraud_rate
7218 };
7219
7220 let error_rate = if self.config.anomaly_injection.enabled {
7221 self.config.anomaly_injection.rates.error_rate
7222 } else {
7223 AnomalyRateConfig::default().error_rate
7224 };
7225
7226 let process_issue_rate = if self.config.anomaly_injection.enabled {
7227 self.config.anomaly_injection.rates.process_rate
7228 } else {
7229 AnomalyRateConfig::default().process_issue_rate
7230 };
7231
7232 let anomaly_config = AnomalyInjectorConfig {
7233 rates: AnomalyRateConfig {
7234 total_rate,
7235 fraud_rate,
7236 error_rate,
7237 process_issue_rate,
7238 ..Default::default()
7239 },
7240 seed: self.seed + 5000,
7241 ..Default::default()
7242 };
7243
7244 let mut injector = AnomalyInjector::new(anomaly_config);
7245 let result = injector.process_entries(entries);
7246
7247 if let Some(pb) = &pb {
7248 pb.inc(entries.len() as u64);
7249 pb.finish_with_message("Anomaly injection complete");
7250 }
7251
7252 let mut by_type = HashMap::new();
7253 for label in &result.labels {
7254 *by_type
7255 .entry(format!("{:?}", label.anomaly_type))
7256 .or_insert(0) += 1;
7257 }
7258
7259 Ok(AnomalyLabels {
7260 labels: result.labels,
7261 summary: Some(result.summary),
7262 by_type,
7263 })
7264 }
7265
7266 fn validate_journal_entries(
7275 &mut self,
7276 entries: &[JournalEntry],
7277 ) -> SynthResult<BalanceValidationResult> {
7278 let clean_entries: Vec<&JournalEntry> = entries
7280 .iter()
7281 .filter(|e| {
7282 e.header
7283 .header_text
7284 .as_ref()
7285 .map(|t| !t.contains("[HUMAN_ERROR:"))
7286 .unwrap_or(true)
7287 })
7288 .collect();
7289
7290 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7291
7292 let config = BalanceTrackerConfig {
7294 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
7298 };
7299 let validation_currency = self
7300 .config
7301 .companies
7302 .first()
7303 .map(|c| c.currency.clone())
7304 .unwrap_or_else(|| "USD".to_string());
7305
7306 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7307
7308 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7310 let errors = tracker.apply_entries(&clean_refs);
7311
7312 if let Some(pb) = &pb {
7313 pb.inc(entries.len() as u64);
7314 }
7315
7316 let has_unbalanced = tracker
7319 .get_validation_errors()
7320 .iter()
7321 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7322
7323 let mut all_errors = errors;
7326 all_errors.extend(tracker.get_validation_errors().iter().cloned());
7327 let company_codes: Vec<String> = self
7328 .config
7329 .companies
7330 .iter()
7331 .map(|c| c.code.clone())
7332 .collect();
7333
7334 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7335 .map(|d| d + chrono::Months::new(self.config.global.period_months))
7336 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7337
7338 for company_code in &company_codes {
7339 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7340 all_errors.push(e);
7341 }
7342 }
7343
7344 let stats = tracker.get_statistics();
7346
7347 let is_balanced = all_errors.is_empty();
7349
7350 if let Some(pb) = pb {
7351 let msg = if is_balanced {
7352 "Balance validation passed"
7353 } else {
7354 "Balance validation completed with errors"
7355 };
7356 pb.finish_with_message(msg);
7357 }
7358
7359 Ok(BalanceValidationResult {
7360 validated: true,
7361 is_balanced,
7362 entries_processed: stats.entries_processed,
7363 total_debits: stats.total_debits,
7364 total_credits: stats.total_credits,
7365 accounts_tracked: stats.accounts_tracked,
7366 companies_tracked: stats.companies_tracked,
7367 validation_errors: all_errors,
7368 has_unbalanced_entries: has_unbalanced,
7369 })
7370 }
7371
7372 fn inject_data_quality(
7377 &mut self,
7378 entries: &mut [JournalEntry],
7379 ) -> SynthResult<DataQualityStats> {
7380 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7381
7382 let config = if self.config.data_quality.enabled {
7385 let dq = &self.config.data_quality;
7386 DataQualityConfig {
7387 enable_missing_values: dq.missing_values.enabled,
7388 missing_values: datasynth_generators::MissingValueConfig {
7389 global_rate: dq.effective_missing_rate(),
7390 ..Default::default()
7391 },
7392 enable_format_variations: dq.format_variations.enabled,
7393 format_variations: datasynth_generators::FormatVariationConfig {
7394 date_variation_rate: dq.format_variations.dates.rate,
7395 amount_variation_rate: dq.format_variations.amounts.rate,
7396 identifier_variation_rate: dq.format_variations.identifiers.rate,
7397 ..Default::default()
7398 },
7399 enable_duplicates: dq.duplicates.enabled,
7400 duplicates: datasynth_generators::DuplicateConfig {
7401 duplicate_rate: dq.effective_duplicate_rate(),
7402 ..Default::default()
7403 },
7404 enable_typos: dq.typos.enabled,
7405 typos: datasynth_generators::TypoConfig {
7406 char_error_rate: dq.effective_typo_rate(),
7407 ..Default::default()
7408 },
7409 enable_encoding_issues: dq.encoding_issues.enabled,
7410 encoding_issue_rate: dq.encoding_issues.rate,
7411 seed: self.seed.wrapping_add(77), track_statistics: true,
7413 }
7414 } else {
7415 DataQualityConfig::minimal()
7416 };
7417 let mut injector = DataQualityInjector::new(config);
7418
7419 injector.set_country_pack(self.primary_pack().clone());
7421
7422 let context = HashMap::new();
7424
7425 for entry in entries.iter_mut() {
7426 if let Some(text) = &entry.header.header_text {
7428 let processed = injector.process_text_field(
7429 "header_text",
7430 text,
7431 &entry.header.document_id.to_string(),
7432 &context,
7433 );
7434 match processed {
7435 Some(new_text) if new_text != *text => {
7436 entry.header.header_text = Some(new_text);
7437 }
7438 None => {
7439 entry.header.header_text = None; }
7441 _ => {}
7442 }
7443 }
7444
7445 if let Some(ref_text) = &entry.header.reference {
7447 let processed = injector.process_text_field(
7448 "reference",
7449 ref_text,
7450 &entry.header.document_id.to_string(),
7451 &context,
7452 );
7453 match processed {
7454 Some(new_text) if new_text != *ref_text => {
7455 entry.header.reference = Some(new_text);
7456 }
7457 None => {
7458 entry.header.reference = None;
7459 }
7460 _ => {}
7461 }
7462 }
7463
7464 let user_persona = entry.header.user_persona.clone();
7466 if let Some(processed) = injector.process_text_field(
7467 "user_persona",
7468 &user_persona,
7469 &entry.header.document_id.to_string(),
7470 &context,
7471 ) {
7472 if processed != user_persona {
7473 entry.header.user_persona = processed;
7474 }
7475 }
7476
7477 for line in &mut entry.lines {
7479 if let Some(ref text) = line.line_text {
7481 let processed = injector.process_text_field(
7482 "line_text",
7483 text,
7484 &entry.header.document_id.to_string(),
7485 &context,
7486 );
7487 match processed {
7488 Some(new_text) if new_text != *text => {
7489 line.line_text = Some(new_text);
7490 }
7491 None => {
7492 line.line_text = None;
7493 }
7494 _ => {}
7495 }
7496 }
7497
7498 if let Some(cc) = &line.cost_center {
7500 let processed = injector.process_text_field(
7501 "cost_center",
7502 cc,
7503 &entry.header.document_id.to_string(),
7504 &context,
7505 );
7506 match processed {
7507 Some(new_cc) if new_cc != *cc => {
7508 line.cost_center = Some(new_cc);
7509 }
7510 None => {
7511 line.cost_center = None;
7512 }
7513 _ => {}
7514 }
7515 }
7516 }
7517
7518 if let Some(pb) = &pb {
7519 pb.inc(1);
7520 }
7521 }
7522
7523 if let Some(pb) = pb {
7524 pb.finish_with_message("Data quality injection complete");
7525 }
7526
7527 Ok(injector.stats().clone())
7528 }
7529
7530 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7541 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7542 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7543 let fiscal_year = start_date.year() as u16;
7544 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7545
7546 let total_revenue: rust_decimal::Decimal = entries
7548 .iter()
7549 .flat_map(|e| e.lines.iter())
7550 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7551 .map(|l| l.credit_amount)
7552 .sum();
7553
7554 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7556
7557 let mut snapshot = AuditSnapshot::default();
7558
7559 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7561 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7562 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7563 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7564 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7565 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7566
7567 let accounts: Vec<String> = self
7569 .coa
7570 .as_ref()
7571 .map(|coa| {
7572 coa.get_postable_accounts()
7573 .iter()
7574 .map(|acc| acc.account_code().to_string())
7575 .collect()
7576 })
7577 .unwrap_or_default();
7578
7579 for (i, company) in self.config.companies.iter().enumerate() {
7581 let company_revenue = total_revenue
7583 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7584
7585 let engagements_for_company =
7587 self.phase_config.audit_engagements / self.config.companies.len().max(1);
7588 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7589 1
7590 } else {
7591 0
7592 };
7593
7594 for _eng_idx in 0..(engagements_for_company + extra) {
7595 let mut engagement = engagement_gen.generate_engagement(
7597 &company.code,
7598 &company.name,
7599 fiscal_year,
7600 period_end,
7601 company_revenue,
7602 None, );
7604
7605 if !self.master_data.employees.is_empty() {
7607 let emp_count = self.master_data.employees.len();
7608 let base = (i * 10 + _eng_idx) % emp_count;
7610 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7611 .employee_id
7612 .clone();
7613 engagement.engagement_manager_id = self.master_data.employees
7614 [(base + 1) % emp_count]
7615 .employee_id
7616 .clone();
7617 let real_team: Vec<String> = engagement
7618 .team_member_ids
7619 .iter()
7620 .enumerate()
7621 .map(|(j, _)| {
7622 self.master_data.employees[(base + 2 + j) % emp_count]
7623 .employee_id
7624 .clone()
7625 })
7626 .collect();
7627 engagement.team_member_ids = real_team;
7628 }
7629
7630 if let Some(pb) = &pb {
7631 pb.inc(1);
7632 }
7633
7634 let team_members: Vec<String> = engagement.team_member_ids.clone();
7636
7637 let workpapers =
7639 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7640
7641 for wp in &workpapers {
7642 if let Some(pb) = &pb {
7643 pb.inc(1);
7644 }
7645
7646 let evidence = evidence_gen.generate_evidence_for_workpaper(
7648 wp,
7649 &team_members,
7650 wp.preparer_date,
7651 );
7652
7653 for _ in &evidence {
7654 if let Some(pb) = &pb {
7655 pb.inc(1);
7656 }
7657 }
7658
7659 snapshot.evidence.extend(evidence);
7660 }
7661
7662 let risks =
7664 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7665
7666 for _ in &risks {
7667 if let Some(pb) = &pb {
7668 pb.inc(1);
7669 }
7670 }
7671 snapshot.risk_assessments.extend(risks);
7672
7673 let findings = finding_gen.generate_findings_for_engagement(
7675 &engagement,
7676 &workpapers,
7677 &team_members,
7678 );
7679
7680 for _ in &findings {
7681 if let Some(pb) = &pb {
7682 pb.inc(1);
7683 }
7684 }
7685 snapshot.findings.extend(findings);
7686
7687 let judgments =
7689 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7690
7691 for _ in &judgments {
7692 if let Some(pb) = &pb {
7693 pb.inc(1);
7694 }
7695 }
7696 snapshot.judgments.extend(judgments);
7697
7698 snapshot.workpapers.extend(workpapers);
7700 snapshot.engagements.push(engagement);
7701 }
7702 }
7703
7704 if let Some(pb) = pb {
7705 pb.finish_with_message(format!(
7706 "Audit data: {} engagements, {} workpapers, {} evidence",
7707 snapshot.engagements.len(),
7708 snapshot.workpapers.len(),
7709 snapshot.evidence.len()
7710 ));
7711 }
7712
7713 Ok(snapshot)
7714 }
7715
7716 fn export_graphs(
7723 &mut self,
7724 entries: &[JournalEntry],
7725 _coa: &Arc<ChartOfAccounts>,
7726 stats: &mut EnhancedGenerationStatistics,
7727 ) -> SynthResult<GraphExportSnapshot> {
7728 let pb = self.create_progress_bar(100, "Exporting Graphs");
7729
7730 let mut snapshot = GraphExportSnapshot::default();
7731
7732 let output_dir = self
7734 .output_path
7735 .clone()
7736 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7737 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7738
7739 for graph_type in &self.config.graph_export.graph_types {
7741 if let Some(pb) = &pb {
7742 pb.inc(10);
7743 }
7744
7745 let graph_config = TransactionGraphConfig {
7747 include_vendors: false,
7748 include_customers: false,
7749 create_debit_credit_edges: true,
7750 include_document_nodes: graph_type.include_document_nodes,
7751 min_edge_weight: graph_type.min_edge_weight,
7752 aggregate_parallel_edges: graph_type.aggregate_edges,
7753 framework: None,
7754 };
7755
7756 let mut builder = TransactionGraphBuilder::new(graph_config);
7757 builder.add_journal_entries(entries);
7758 let graph = builder.build();
7759
7760 stats.graph_node_count += graph.node_count();
7762 stats.graph_edge_count += graph.edge_count();
7763
7764 if let Some(pb) = &pb {
7765 pb.inc(40);
7766 }
7767
7768 for format in &self.config.graph_export.formats {
7770 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7771
7772 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7774 warn!("Failed to create graph output directory: {}", e);
7775 continue;
7776 }
7777
7778 match format {
7779 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7780 let pyg_config = PyGExportConfig {
7781 common: datasynth_graph::CommonExportConfig {
7782 export_node_features: true,
7783 export_edge_features: true,
7784 export_node_labels: true,
7785 export_edge_labels: true,
7786 export_masks: true,
7787 train_ratio: self.config.graph_export.train_ratio,
7788 val_ratio: self.config.graph_export.validation_ratio,
7789 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7790 },
7791 one_hot_categoricals: false,
7792 };
7793
7794 let exporter = PyGExporter::new(pyg_config);
7795 match exporter.export(&graph, &format_dir) {
7796 Ok(metadata) => {
7797 snapshot.exports.insert(
7798 format!("{}_{}", graph_type.name, "pytorch_geometric"),
7799 GraphExportInfo {
7800 name: graph_type.name.clone(),
7801 format: "pytorch_geometric".to_string(),
7802 output_path: format_dir.clone(),
7803 node_count: metadata.num_nodes,
7804 edge_count: metadata.num_edges,
7805 },
7806 );
7807 snapshot.graph_count += 1;
7808 }
7809 Err(e) => {
7810 warn!("Failed to export PyTorch Geometric graph: {}", e);
7811 }
7812 }
7813 }
7814 datasynth_config::schema::GraphExportFormat::Neo4j => {
7815 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7816
7817 let neo4j_config = Neo4jExportConfig {
7818 export_node_properties: true,
7819 export_edge_properties: true,
7820 export_features: true,
7821 generate_cypher: true,
7822 generate_admin_import: true,
7823 database_name: "synth".to_string(),
7824 cypher_batch_size: 1000,
7825 };
7826
7827 let exporter = Neo4jExporter::new(neo4j_config);
7828 match exporter.export(&graph, &format_dir) {
7829 Ok(metadata) => {
7830 snapshot.exports.insert(
7831 format!("{}_{}", graph_type.name, "neo4j"),
7832 GraphExportInfo {
7833 name: graph_type.name.clone(),
7834 format: "neo4j".to_string(),
7835 output_path: format_dir.clone(),
7836 node_count: metadata.num_nodes,
7837 edge_count: metadata.num_edges,
7838 },
7839 );
7840 snapshot.graph_count += 1;
7841 }
7842 Err(e) => {
7843 warn!("Failed to export Neo4j graph: {}", e);
7844 }
7845 }
7846 }
7847 datasynth_config::schema::GraphExportFormat::Dgl => {
7848 use datasynth_graph::{DGLExportConfig, DGLExporter};
7849
7850 let dgl_config = DGLExportConfig {
7851 common: datasynth_graph::CommonExportConfig {
7852 export_node_features: true,
7853 export_edge_features: true,
7854 export_node_labels: true,
7855 export_edge_labels: true,
7856 export_masks: true,
7857 train_ratio: self.config.graph_export.train_ratio,
7858 val_ratio: self.config.graph_export.validation_ratio,
7859 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7860 },
7861 heterogeneous: false,
7862 include_pickle_script: true, };
7864
7865 let exporter = DGLExporter::new(dgl_config);
7866 match exporter.export(&graph, &format_dir) {
7867 Ok(metadata) => {
7868 snapshot.exports.insert(
7869 format!("{}_{}", graph_type.name, "dgl"),
7870 GraphExportInfo {
7871 name: graph_type.name.clone(),
7872 format: "dgl".to_string(),
7873 output_path: format_dir.clone(),
7874 node_count: metadata.common.num_nodes,
7875 edge_count: metadata.common.num_edges,
7876 },
7877 );
7878 snapshot.graph_count += 1;
7879 }
7880 Err(e) => {
7881 warn!("Failed to export DGL graph: {}", e);
7882 }
7883 }
7884 }
7885 datasynth_config::schema::GraphExportFormat::RustGraph => {
7886 use datasynth_graph::{
7887 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
7888 };
7889
7890 let rustgraph_config = RustGraphExportConfig {
7891 include_features: true,
7892 include_temporal: true,
7893 include_labels: true,
7894 source_name: "datasynth".to_string(),
7895 batch_id: None,
7896 output_format: RustGraphOutputFormat::JsonLines,
7897 export_node_properties: true,
7898 export_edge_properties: true,
7899 pretty_print: false,
7900 };
7901
7902 let exporter = RustGraphExporter::new(rustgraph_config);
7903 match exporter.export(&graph, &format_dir) {
7904 Ok(metadata) => {
7905 snapshot.exports.insert(
7906 format!("{}_{}", graph_type.name, "rustgraph"),
7907 GraphExportInfo {
7908 name: graph_type.name.clone(),
7909 format: "rustgraph".to_string(),
7910 output_path: format_dir.clone(),
7911 node_count: metadata.num_nodes,
7912 edge_count: metadata.num_edges,
7913 },
7914 );
7915 snapshot.graph_count += 1;
7916 }
7917 Err(e) => {
7918 warn!("Failed to export RustGraph: {}", e);
7919 }
7920 }
7921 }
7922 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
7923 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
7925 }
7926 }
7927 }
7928
7929 if let Some(pb) = &pb {
7930 pb.inc(40);
7931 }
7932 }
7933
7934 stats.graph_export_count = snapshot.graph_count;
7935 snapshot.exported = snapshot.graph_count > 0;
7936
7937 if let Some(pb) = pb {
7938 pb.finish_with_message(format!(
7939 "Graphs exported: {} graphs ({} nodes, {} edges)",
7940 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
7941 ));
7942 }
7943
7944 Ok(snapshot)
7945 }
7946
7947 fn build_additional_graphs(
7952 &self,
7953 banking: &BankingSnapshot,
7954 intercompany: &IntercompanySnapshot,
7955 entries: &[JournalEntry],
7956 stats: &mut EnhancedGenerationStatistics,
7957 ) {
7958 let output_dir = self
7959 .output_path
7960 .clone()
7961 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7962 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7963
7964 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
7966 info!("Phase 10c: Building banking network graph");
7967 let config = BankingGraphConfig::default();
7968 let mut builder = BankingGraphBuilder::new(config);
7969 builder.add_customers(&banking.customers);
7970 builder.add_accounts(&banking.accounts, &banking.customers);
7971 builder.add_transactions(&banking.transactions);
7972 let graph = builder.build();
7973
7974 let node_count = graph.node_count();
7975 let edge_count = graph.edge_count();
7976 stats.graph_node_count += node_count;
7977 stats.graph_edge_count += edge_count;
7978
7979 for format in &self.config.graph_export.formats {
7981 if matches!(
7982 format,
7983 datasynth_config::schema::GraphExportFormat::PytorchGeometric
7984 ) {
7985 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
7986 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7987 warn!("Failed to create banking graph output dir: {}", e);
7988 continue;
7989 }
7990 let pyg_config = PyGExportConfig::default();
7991 let exporter = PyGExporter::new(pyg_config);
7992 if let Err(e) = exporter.export(&graph, &format_dir) {
7993 warn!("Failed to export banking graph as PyG: {}", e);
7994 } else {
7995 info!(
7996 "Banking network graph exported: {} nodes, {} edges",
7997 node_count, edge_count
7998 );
7999 }
8000 }
8001 }
8002 }
8003
8004 let approval_entries: Vec<_> = entries
8006 .iter()
8007 .filter(|je| je.header.approval_workflow.is_some())
8008 .collect();
8009
8010 if !approval_entries.is_empty() {
8011 info!(
8012 "Phase 10c: Building approval network graph ({} entries with approvals)",
8013 approval_entries.len()
8014 );
8015 let config = ApprovalGraphConfig::default();
8016 let mut builder = ApprovalGraphBuilder::new(config);
8017
8018 for je in &approval_entries {
8019 if let Some(ref wf) = je.header.approval_workflow {
8020 for action in &wf.actions {
8021 let record = datasynth_core::models::ApprovalRecord {
8022 approval_id: format!(
8023 "APR-{}-{}",
8024 je.header.document_id, action.approval_level
8025 ),
8026 document_number: je.header.document_id.to_string(),
8027 document_type: "JE".to_string(),
8028 company_code: je.company_code().to_string(),
8029 requester_id: wf.preparer_id.clone(),
8030 requester_name: Some(wf.preparer_name.clone()),
8031 approver_id: action.actor_id.clone(),
8032 approver_name: action.actor_name.clone(),
8033 approval_date: je.posting_date(),
8034 action: format!("{:?}", action.action),
8035 amount: wf.amount,
8036 approval_limit: None,
8037 comments: action.comments.clone(),
8038 delegation_from: None,
8039 is_auto_approved: false,
8040 };
8041 builder.add_approval(&record);
8042 }
8043 }
8044 }
8045
8046 let graph = builder.build();
8047 let node_count = graph.node_count();
8048 let edge_count = graph.edge_count();
8049 stats.graph_node_count += node_count;
8050 stats.graph_edge_count += edge_count;
8051
8052 for format in &self.config.graph_export.formats {
8054 if matches!(
8055 format,
8056 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8057 ) {
8058 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8059 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8060 warn!("Failed to create approval graph output dir: {}", e);
8061 continue;
8062 }
8063 let pyg_config = PyGExportConfig::default();
8064 let exporter = PyGExporter::new(pyg_config);
8065 if let Err(e) = exporter.export(&graph, &format_dir) {
8066 warn!("Failed to export approval graph as PyG: {}", e);
8067 } else {
8068 info!(
8069 "Approval network graph exported: {} nodes, {} edges",
8070 node_count, edge_count
8071 );
8072 }
8073 }
8074 }
8075 }
8076
8077 if self.config.companies.len() >= 2 {
8079 info!(
8080 "Phase 10c: Building entity relationship graph ({} companies)",
8081 self.config.companies.len()
8082 );
8083
8084 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8085 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8086
8087 let parent_code = &self.config.companies[0].code;
8089 let mut companies: Vec<datasynth_core::models::Company> =
8090 Vec::with_capacity(self.config.companies.len());
8091
8092 let first = &self.config.companies[0];
8094 companies.push(datasynth_core::models::Company::parent(
8095 &first.code,
8096 &first.name,
8097 &first.country,
8098 &first.currency,
8099 ));
8100
8101 for cc in self.config.companies.iter().skip(1) {
8103 companies.push(datasynth_core::models::Company::subsidiary(
8104 &cc.code,
8105 &cc.name,
8106 &cc.country,
8107 &cc.currency,
8108 parent_code,
8109 rust_decimal::Decimal::from(100),
8110 ));
8111 }
8112
8113 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8115 self.config
8116 .companies
8117 .iter()
8118 .skip(1)
8119 .enumerate()
8120 .map(|(i, cc)| {
8121 let mut rel =
8122 datasynth_core::models::intercompany::IntercompanyRelationship::new(
8123 format!("REL{:03}", i + 1),
8124 parent_code.clone(),
8125 cc.code.clone(),
8126 rust_decimal::Decimal::from(100),
8127 start_date,
8128 );
8129 rel.functional_currency = cc.currency.clone();
8130 rel
8131 })
8132 .collect();
8133
8134 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8135 builder.add_companies(&companies);
8136 builder.add_ownership_relationships(&relationships);
8137
8138 for pair in &intercompany.matched_pairs {
8140 builder.add_intercompany_edge(
8141 &pair.seller_company,
8142 &pair.buyer_company,
8143 pair.amount,
8144 &format!("{:?}", pair.transaction_type),
8145 );
8146 }
8147
8148 let graph = builder.build();
8149 let node_count = graph.node_count();
8150 let edge_count = graph.edge_count();
8151 stats.graph_node_count += node_count;
8152 stats.graph_edge_count += edge_count;
8153
8154 for format in &self.config.graph_export.formats {
8156 if matches!(
8157 format,
8158 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8159 ) {
8160 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8161 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8162 warn!("Failed to create entity graph output dir: {}", e);
8163 continue;
8164 }
8165 let pyg_config = PyGExportConfig::default();
8166 let exporter = PyGExporter::new(pyg_config);
8167 if let Err(e) = exporter.export(&graph, &format_dir) {
8168 warn!("Failed to export entity graph as PyG: {}", e);
8169 } else {
8170 info!(
8171 "Entity relationship graph exported: {} nodes, {} edges",
8172 node_count, edge_count
8173 );
8174 }
8175 }
8176 }
8177 } else {
8178 debug!(
8179 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8180 self.config.companies.len()
8181 );
8182 }
8183 }
8184
8185 #[allow(clippy::too_many_arguments)]
8192 fn export_hypergraph(
8193 &self,
8194 coa: &Arc<ChartOfAccounts>,
8195 entries: &[JournalEntry],
8196 document_flows: &DocumentFlowSnapshot,
8197 sourcing: &SourcingSnapshot,
8198 hr: &HrSnapshot,
8199 manufacturing: &ManufacturingSnapshot,
8200 banking: &BankingSnapshot,
8201 audit: &AuditSnapshot,
8202 financial_reporting: &FinancialReportingSnapshot,
8203 ocpm: &OcpmSnapshot,
8204 compliance: &ComplianceRegulationsSnapshot,
8205 stats: &mut EnhancedGenerationStatistics,
8206 ) -> SynthResult<HypergraphExportInfo> {
8207 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8208 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8209 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8210 use datasynth_graph::models::hypergraph::AggregationStrategy;
8211
8212 let hg_settings = &self.config.graph_export.hypergraph;
8213
8214 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8216 "truncate" => AggregationStrategy::Truncate,
8217 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8218 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8219 "importance_sample" => AggregationStrategy::ImportanceSample,
8220 _ => AggregationStrategy::PoolByCounterparty,
8221 };
8222
8223 let builder_config = HypergraphConfig {
8224 max_nodes: hg_settings.max_nodes,
8225 aggregation_strategy,
8226 include_coso: hg_settings.governance_layer.include_coso,
8227 include_controls: hg_settings.governance_layer.include_controls,
8228 include_sox: hg_settings.governance_layer.include_sox,
8229 include_vendors: hg_settings.governance_layer.include_vendors,
8230 include_customers: hg_settings.governance_layer.include_customers,
8231 include_employees: hg_settings.governance_layer.include_employees,
8232 include_p2p: hg_settings.process_layer.include_p2p,
8233 include_o2c: hg_settings.process_layer.include_o2c,
8234 include_s2c: hg_settings.process_layer.include_s2c,
8235 include_h2r: hg_settings.process_layer.include_h2r,
8236 include_mfg: hg_settings.process_layer.include_mfg,
8237 include_bank: hg_settings.process_layer.include_bank,
8238 include_audit: hg_settings.process_layer.include_audit,
8239 include_r2r: hg_settings.process_layer.include_r2r,
8240 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8241 docs_per_counterparty_threshold: hg_settings
8242 .process_layer
8243 .docs_per_counterparty_threshold,
8244 include_accounts: hg_settings.accounting_layer.include_accounts,
8245 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8246 include_cross_layer_edges: hg_settings.cross_layer.enabled,
8247 include_compliance: self.config.compliance_regulations.enabled,
8248 };
8249
8250 let mut builder = HypergraphBuilder::new(builder_config);
8251
8252 builder.add_coso_framework();
8254
8255 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8258 let controls = InternalControl::standard_controls();
8259 builder.add_controls(&controls);
8260 }
8261
8262 builder.add_vendors(&self.master_data.vendors);
8264 builder.add_customers(&self.master_data.customers);
8265 builder.add_employees(&self.master_data.employees);
8266
8267 builder.add_p2p_documents(
8269 &document_flows.purchase_orders,
8270 &document_flows.goods_receipts,
8271 &document_flows.vendor_invoices,
8272 &document_flows.payments,
8273 );
8274 builder.add_o2c_documents(
8275 &document_flows.sales_orders,
8276 &document_flows.deliveries,
8277 &document_flows.customer_invoices,
8278 );
8279 builder.add_s2c_documents(
8280 &sourcing.sourcing_projects,
8281 &sourcing.qualifications,
8282 &sourcing.rfx_events,
8283 &sourcing.bids,
8284 &sourcing.bid_evaluations,
8285 &sourcing.contracts,
8286 );
8287 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8288 builder.add_mfg_documents(
8289 &manufacturing.production_orders,
8290 &manufacturing.quality_inspections,
8291 &manufacturing.cycle_counts,
8292 );
8293 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8294 builder.add_audit_documents(
8295 &audit.engagements,
8296 &audit.workpapers,
8297 &audit.findings,
8298 &audit.evidence,
8299 &audit.risk_assessments,
8300 &audit.judgments,
8301 );
8302 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8303
8304 if let Some(ref event_log) = ocpm.event_log {
8306 builder.add_ocpm_events(event_log);
8307 }
8308
8309 if self.config.compliance_regulations.enabled
8311 && hg_settings.governance_layer.include_controls
8312 {
8313 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8315 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
8316 .standard_records
8317 .iter()
8318 .filter_map(|r| {
8319 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
8320 registry.get(&sid).cloned()
8321 })
8322 .collect();
8323
8324 builder.add_compliance_regulations(
8325 &standards,
8326 &compliance.findings,
8327 &compliance.filings,
8328 );
8329 }
8330
8331 builder.add_accounts(coa);
8333 builder.add_journal_entries_as_hyperedges(entries);
8334
8335 let hypergraph = builder.build();
8337
8338 let output_dir = self
8340 .output_path
8341 .clone()
8342 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8343 let hg_dir = output_dir
8344 .join(&self.config.graph_export.output_subdirectory)
8345 .join(&hg_settings.output_subdirectory);
8346
8347 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8349 "unified" => {
8350 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8351 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8352 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8353 })?;
8354 (
8355 metadata.num_nodes,
8356 metadata.num_edges,
8357 metadata.num_hyperedges,
8358 )
8359 }
8360 _ => {
8361 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8363 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8364 SynthError::generation(format!("Hypergraph export failed: {e}"))
8365 })?;
8366 (
8367 metadata.num_nodes,
8368 metadata.num_edges,
8369 metadata.num_hyperedges,
8370 )
8371 }
8372 };
8373
8374 #[cfg(feature = "streaming")]
8376 if let Some(ref target_url) = hg_settings.stream_target {
8377 use crate::stream_client::{StreamClient, StreamConfig};
8378 use std::io::Write as _;
8379
8380 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8381 let stream_config = StreamConfig {
8382 target_url: target_url.clone(),
8383 batch_size: hg_settings.stream_batch_size,
8384 api_key,
8385 ..StreamConfig::default()
8386 };
8387
8388 match StreamClient::new(stream_config) {
8389 Ok(mut client) => {
8390 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8391 match exporter.export_to_writer(&hypergraph, &mut client) {
8392 Ok(_) => {
8393 if let Err(e) = client.flush() {
8394 warn!("Failed to flush stream client: {}", e);
8395 } else {
8396 info!("Streamed {} records to {}", client.total_sent(), target_url);
8397 }
8398 }
8399 Err(e) => {
8400 warn!("Streaming export failed: {}", e);
8401 }
8402 }
8403 }
8404 Err(e) => {
8405 warn!("Failed to create stream client: {}", e);
8406 }
8407 }
8408 }
8409
8410 stats.graph_node_count += num_nodes;
8412 stats.graph_edge_count += num_edges;
8413 stats.graph_export_count += 1;
8414
8415 Ok(HypergraphExportInfo {
8416 node_count: num_nodes,
8417 edge_count: num_edges,
8418 hyperedge_count: num_hyperedges,
8419 output_path: hg_dir,
8420 })
8421 }
8422
8423 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8428 let pb = self.create_progress_bar(100, "Generating Banking Data");
8429
8430 let orchestrator = BankingOrchestratorBuilder::new()
8432 .config(self.config.banking.clone())
8433 .seed(self.seed + 9000)
8434 .country_pack(self.primary_pack().clone())
8435 .build();
8436
8437 if let Some(pb) = &pb {
8438 pb.inc(10);
8439 }
8440
8441 let result = orchestrator.generate();
8443
8444 if let Some(pb) = &pb {
8445 pb.inc(90);
8446 pb.finish_with_message(format!(
8447 "Banking: {} customers, {} transactions",
8448 result.customers.len(),
8449 result.transactions.len()
8450 ));
8451 }
8452
8453 let mut banking_customers = result.customers;
8458 let core_customers = &self.master_data.customers;
8459 if !core_customers.is_empty() {
8460 for (i, bc) in banking_customers.iter_mut().enumerate() {
8461 let core = &core_customers[i % core_customers.len()];
8462 bc.name = CustomerName::business(&core.name);
8463 bc.residence_country = core.country.clone();
8464 bc.enterprise_customer_id = Some(core.customer_id.clone());
8465 }
8466 debug!(
8467 "Cross-referenced {} banking customers with {} core customers",
8468 banking_customers.len(),
8469 core_customers.len()
8470 );
8471 }
8472
8473 Ok(BankingSnapshot {
8474 customers: banking_customers,
8475 accounts: result.accounts,
8476 transactions: result.transactions,
8477 transaction_labels: result.transaction_labels,
8478 customer_labels: result.customer_labels,
8479 account_labels: result.account_labels,
8480 relationship_labels: result.relationship_labels,
8481 narratives: result.narratives,
8482 suspicious_count: result.stats.suspicious_count,
8483 scenario_count: result.scenarios.len(),
8484 })
8485 }
8486
8487 fn calculate_total_transactions(&self) -> u64 {
8489 let months = self.config.global.period_months as f64;
8490 self.config
8491 .companies
8492 .iter()
8493 .map(|c| {
8494 let annual = c.annual_transaction_volume.count() as f64;
8495 let weighted = annual * c.volume_weight;
8496 (weighted * months / 12.0) as u64
8497 })
8498 .sum()
8499 }
8500
8501 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8503 if !self.phase_config.show_progress {
8504 return None;
8505 }
8506
8507 let pb = if let Some(mp) = &self.multi_progress {
8508 mp.add(ProgressBar::new(total))
8509 } else {
8510 ProgressBar::new(total)
8511 };
8512
8513 pb.set_style(
8514 ProgressStyle::default_bar()
8515 .template(&format!(
8516 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8517 ))
8518 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8519 .progress_chars("#>-"),
8520 );
8521
8522 Some(pb)
8523 }
8524
8525 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8527 self.coa.clone()
8528 }
8529
8530 pub fn get_master_data(&self) -> &MasterDataSnapshot {
8532 &self.master_data
8533 }
8534
8535 fn phase_compliance_regulations(
8537 &mut self,
8538 _stats: &mut EnhancedGenerationStatistics,
8539 ) -> SynthResult<ComplianceRegulationsSnapshot> {
8540 if !self.phase_config.generate_compliance_regulations {
8541 return Ok(ComplianceRegulationsSnapshot::default());
8542 }
8543
8544 info!("Phase: Generating Compliance Regulations Data");
8545
8546 let cr_config = &self.config.compliance_regulations;
8547
8548 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
8550 self.config
8551 .companies
8552 .iter()
8553 .map(|c| c.country.clone())
8554 .collect::<std::collections::HashSet<_>>()
8555 .into_iter()
8556 .collect()
8557 } else {
8558 cr_config.jurisdictions.clone()
8559 };
8560
8561 let fallback_date =
8563 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
8564 let reference_date = cr_config
8565 .reference_date
8566 .as_ref()
8567 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
8568 .unwrap_or_else(|| {
8569 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8570 .unwrap_or(fallback_date)
8571 });
8572
8573 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
8575 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
8576 let cross_reference_records = reg_gen.generate_cross_reference_records();
8577 let jurisdiction_records =
8578 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
8579
8580 info!(
8581 " Standards: {} records, {} cross-references, {} jurisdictions",
8582 standard_records.len(),
8583 cross_reference_records.len(),
8584 jurisdiction_records.len()
8585 );
8586
8587 let audit_procedures = if cr_config.audit_procedures.enabled {
8589 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
8590 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
8591 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
8592 confidence_level: cr_config.audit_procedures.confidence_level,
8593 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
8594 };
8595 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
8596 self.seed + 9000,
8597 proc_config,
8598 );
8599 let registry = reg_gen.registry();
8600 let mut all_procs = Vec::new();
8601 for jurisdiction in &jurisdictions {
8602 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
8603 all_procs.extend(procs);
8604 }
8605 info!(" Audit procedures: {}", all_procs.len());
8606 all_procs
8607 } else {
8608 Vec::new()
8609 };
8610
8611 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
8613 let finding_config =
8614 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
8615 finding_rate: cr_config.findings.finding_rate,
8616 material_weakness_rate: cr_config.findings.material_weakness_rate,
8617 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
8618 generate_remediation: cr_config.findings.generate_remediation,
8619 };
8620 let mut finding_gen =
8621 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
8622 self.seed + 9100,
8623 finding_config,
8624 );
8625 let mut all_findings = Vec::new();
8626 for company in &self.config.companies {
8627 let company_findings =
8628 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
8629 all_findings.extend(company_findings);
8630 }
8631 info!(" Compliance findings: {}", all_findings.len());
8632 all_findings
8633 } else {
8634 Vec::new()
8635 };
8636
8637 let filings = if cr_config.filings.enabled {
8639 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
8640 filing_types: cr_config.filings.filing_types.clone(),
8641 generate_status_progression: cr_config.filings.generate_status_progression,
8642 };
8643 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
8644 self.seed + 9200,
8645 filing_config,
8646 );
8647 let company_codes: Vec<String> = self
8648 .config
8649 .companies
8650 .iter()
8651 .map(|c| c.code.clone())
8652 .collect();
8653 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8654 .unwrap_or(fallback_date);
8655 let filings = filing_gen.generate_filings(
8656 &company_codes,
8657 &jurisdictions,
8658 start_date,
8659 self.config.global.period_months,
8660 );
8661 info!(" Regulatory filings: {}", filings.len());
8662 filings
8663 } else {
8664 Vec::new()
8665 };
8666
8667 let compliance_graph = if cr_config.graph.enabled {
8669 let graph_config = datasynth_graph::ComplianceGraphConfig {
8670 include_standard_nodes: cr_config.graph.include_compliance_nodes,
8671 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
8672 include_cross_references: cr_config.graph.include_cross_references,
8673 include_supersession_edges: cr_config.graph.include_supersession_edges,
8674 include_account_links: cr_config.graph.include_account_links,
8675 include_control_links: cr_config.graph.include_control_links,
8676 include_company_links: cr_config.graph.include_company_links,
8677 };
8678 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
8679
8680 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
8682 .iter()
8683 .map(|r| datasynth_graph::StandardNodeInput {
8684 standard_id: r.standard_id.clone(),
8685 title: r.title.clone(),
8686 category: r.category.clone(),
8687 domain: r.domain.clone(),
8688 is_active: r.is_active,
8689 features: vec![if r.is_active { 1.0 } else { 0.0 }],
8690 applicable_account_types: r.applicable_account_types.clone(),
8691 applicable_processes: r.applicable_processes.clone(),
8692 })
8693 .collect();
8694 builder.add_standards(&standard_inputs);
8695
8696 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
8698 jurisdiction_records
8699 .iter()
8700 .map(|r| datasynth_graph::JurisdictionNodeInput {
8701 country_code: r.country_code.clone(),
8702 country_name: r.country_name.clone(),
8703 framework: r.accounting_framework.clone(),
8704 standard_count: r.standard_count,
8705 tax_rate: r.statutory_tax_rate,
8706 })
8707 .collect();
8708 builder.add_jurisdictions(&jurisdiction_inputs);
8709
8710 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
8712 cross_reference_records
8713 .iter()
8714 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
8715 from_standard: r.from_standard.clone(),
8716 to_standard: r.to_standard.clone(),
8717 relationship: r.relationship.clone(),
8718 convergence_level: r.convergence_level,
8719 })
8720 .collect();
8721 builder.add_cross_references(&xref_inputs);
8722
8723 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
8725 .iter()
8726 .map(|r| datasynth_graph::JurisdictionMappingInput {
8727 country_code: r.jurisdiction.clone(),
8728 standard_id: r.standard_id.clone(),
8729 })
8730 .collect();
8731 builder.add_jurisdiction_mappings(&mapping_inputs);
8732
8733 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
8735 .iter()
8736 .map(|p| datasynth_graph::ProcedureNodeInput {
8737 procedure_id: p.procedure_id.clone(),
8738 standard_id: p.standard_id.clone(),
8739 procedure_type: p.procedure_type.clone(),
8740 sample_size: p.sample_size,
8741 confidence_level: p.confidence_level,
8742 })
8743 .collect();
8744 builder.add_procedures(&proc_inputs);
8745
8746 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
8748 .iter()
8749 .map(|f| datasynth_graph::FindingNodeInput {
8750 finding_id: f.finding_id.to_string(),
8751 standard_id: f
8752 .related_standards
8753 .first()
8754 .map(|s| s.as_str().to_string())
8755 .unwrap_or_default(),
8756 severity: f.severity.to_string(),
8757 deficiency_level: f.deficiency_level.to_string(),
8758 severity_score: f.deficiency_level.severity_score(),
8759 control_id: f.control_id.clone(),
8760 affected_accounts: f.affected_accounts.clone(),
8761 })
8762 .collect();
8763 builder.add_findings(&finding_inputs);
8764
8765 if cr_config.graph.include_account_links {
8767 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8768 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
8769 for std_record in &standard_records {
8770 if let Some(std_obj) =
8771 registry.get(&datasynth_core::models::compliance::StandardId::parse(
8772 &std_record.standard_id,
8773 ))
8774 {
8775 for acct_type in &std_obj.applicable_account_types {
8776 account_links.push(datasynth_graph::AccountLinkInput {
8777 standard_id: std_record.standard_id.clone(),
8778 account_code: acct_type.clone(),
8779 account_name: acct_type.clone(),
8780 });
8781 }
8782 }
8783 }
8784 builder.add_account_links(&account_links);
8785 }
8786
8787 if cr_config.graph.include_control_links {
8789 let mut control_links = Vec::new();
8790 let sox_like_ids: Vec<String> = standard_records
8792 .iter()
8793 .filter(|r| {
8794 r.standard_id.starts_with("SOX")
8795 || r.standard_id.starts_with("PCAOB-AS-2201")
8796 })
8797 .map(|r| r.standard_id.clone())
8798 .collect();
8799 let control_ids = [
8801 ("C001", "Cash Controls"),
8802 ("C002", "Large Transaction Approval"),
8803 ("C010", "PO Approval"),
8804 ("C011", "Three-Way Match"),
8805 ("C020", "Revenue Recognition"),
8806 ("C021", "Credit Check"),
8807 ("C030", "Manual JE Approval"),
8808 ("C031", "Period Close Review"),
8809 ("C032", "Account Reconciliation"),
8810 ("C040", "Payroll Processing"),
8811 ("C050", "Fixed Asset Capitalization"),
8812 ("C060", "Intercompany Elimination"),
8813 ];
8814 for sox_id in &sox_like_ids {
8815 for (ctrl_id, ctrl_name) in &control_ids {
8816 control_links.push(datasynth_graph::ControlLinkInput {
8817 standard_id: sox_id.clone(),
8818 control_id: ctrl_id.to_string(),
8819 control_name: ctrl_name.to_string(),
8820 });
8821 }
8822 }
8823 builder.add_control_links(&control_links);
8824 }
8825
8826 if cr_config.graph.include_company_links {
8828 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
8829 .iter()
8830 .enumerate()
8831 .map(|(i, f)| datasynth_graph::FilingNodeInput {
8832 filing_id: format!("F{:04}", i + 1),
8833 filing_type: f.filing_type.to_string(),
8834 company_code: f.company_code.clone(),
8835 jurisdiction: f.jurisdiction.clone(),
8836 status: format!("{:?}", f.status),
8837 })
8838 .collect();
8839 builder.add_filings(&filing_inputs);
8840 }
8841
8842 let graph = builder.build();
8843 info!(
8844 " Compliance graph: {} nodes, {} edges",
8845 graph.nodes.len(),
8846 graph.edges.len()
8847 );
8848 Some(graph)
8849 } else {
8850 None
8851 };
8852
8853 self.check_resources_with_log("post-compliance-regulations")?;
8854
8855 Ok(ComplianceRegulationsSnapshot {
8856 standard_records,
8857 cross_reference_records,
8858 jurisdiction_records,
8859 audit_procedures,
8860 findings,
8861 filings,
8862 compliance_graph,
8863 })
8864 }
8865
8866 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
8868 use super::lineage::LineageGraphBuilder;
8869
8870 let mut builder = LineageGraphBuilder::new();
8871
8872 builder.add_config_section("config:global", "Global Config");
8874 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
8875 builder.add_config_section("config:transactions", "Transaction Config");
8876
8877 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
8879 builder.add_generator_phase("phase:je", "Journal Entry Generation");
8880
8881 builder.configured_by("phase:coa", "config:chart_of_accounts");
8883 builder.configured_by("phase:je", "config:transactions");
8884
8885 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
8887 builder.produced_by("output:je", "phase:je");
8888
8889 if self.phase_config.generate_master_data {
8891 builder.add_config_section("config:master_data", "Master Data Config");
8892 builder.add_generator_phase("phase:master_data", "Master Data Generation");
8893 builder.configured_by("phase:master_data", "config:master_data");
8894 builder.input_to("phase:master_data", "phase:je");
8895 }
8896
8897 if self.phase_config.generate_document_flows {
8898 builder.add_config_section("config:document_flows", "Document Flow Config");
8899 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
8900 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
8901 builder.configured_by("phase:p2p", "config:document_flows");
8902 builder.configured_by("phase:o2c", "config:document_flows");
8903
8904 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
8905 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
8906 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
8907 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
8908 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
8909
8910 builder.produced_by("output:po", "phase:p2p");
8911 builder.produced_by("output:gr", "phase:p2p");
8912 builder.produced_by("output:vi", "phase:p2p");
8913 builder.produced_by("output:so", "phase:o2c");
8914 builder.produced_by("output:ci", "phase:o2c");
8915 }
8916
8917 if self.phase_config.inject_anomalies {
8918 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
8919 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
8920 builder.configured_by("phase:anomaly", "config:fraud");
8921 builder.add_output_file(
8922 "output:labels",
8923 "Anomaly Labels",
8924 "labels/anomaly_labels.csv",
8925 );
8926 builder.produced_by("output:labels", "phase:anomaly");
8927 }
8928
8929 if self.phase_config.generate_audit {
8930 builder.add_config_section("config:audit", "Audit Config");
8931 builder.add_generator_phase("phase:audit", "Audit Data Generation");
8932 builder.configured_by("phase:audit", "config:audit");
8933 }
8934
8935 if self.phase_config.generate_banking {
8936 builder.add_config_section("config:banking", "Banking Config");
8937 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
8938 builder.configured_by("phase:banking", "config:banking");
8939 }
8940
8941 if self.config.llm.enabled {
8942 builder.add_config_section("config:llm", "LLM Enrichment Config");
8943 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
8944 builder.configured_by("phase:llm_enrichment", "config:llm");
8945 }
8946
8947 if self.config.diffusion.enabled {
8948 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
8949 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
8950 builder.configured_by("phase:diffusion", "config:diffusion");
8951 }
8952
8953 if self.config.causal.enabled {
8954 builder.add_config_section("config:causal", "Causal Generation Config");
8955 builder.add_generator_phase("phase:causal", "Causal Overlay");
8956 builder.configured_by("phase:causal", "config:causal");
8957 }
8958
8959 builder.build()
8960 }
8961}
8962
8963fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
8965 match format {
8966 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
8967 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
8968 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
8969 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
8970 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
8971 }
8972}
8973
8974#[cfg(test)]
8975#[allow(clippy::unwrap_used)]
8976mod tests {
8977 use super::*;
8978 use datasynth_config::schema::*;
8979
8980 fn create_test_config() -> GeneratorConfig {
8981 GeneratorConfig {
8982 global: GlobalConfig {
8983 industry: IndustrySector::Manufacturing,
8984 start_date: "2024-01-01".to_string(),
8985 period_months: 1,
8986 seed: Some(42),
8987 parallel: false,
8988 group_currency: "USD".to_string(),
8989 worker_threads: 0,
8990 memory_limit_mb: 0,
8991 fiscal_year_months: None,
8992 },
8993 companies: vec![CompanyConfig {
8994 code: "1000".to_string(),
8995 name: "Test Company".to_string(),
8996 currency: "USD".to_string(),
8997 country: "US".to_string(),
8998 annual_transaction_volume: TransactionVolume::TenK,
8999 volume_weight: 1.0,
9000 fiscal_year_variant: "K4".to_string(),
9001 }],
9002 chart_of_accounts: ChartOfAccountsConfig {
9003 complexity: CoAComplexity::Small,
9004 industry_specific: true,
9005 custom_accounts: None,
9006 min_hierarchy_depth: 2,
9007 max_hierarchy_depth: 4,
9008 },
9009 transactions: TransactionConfig::default(),
9010 output: OutputConfig::default(),
9011 fraud: FraudConfig::default(),
9012 internal_controls: InternalControlsConfig::default(),
9013 business_processes: BusinessProcessConfig::default(),
9014 user_personas: UserPersonaConfig::default(),
9015 templates: TemplateConfig::default(),
9016 approval: ApprovalConfig::default(),
9017 departments: DepartmentConfig::default(),
9018 master_data: MasterDataConfig::default(),
9019 document_flows: DocumentFlowConfig::default(),
9020 intercompany: IntercompanyConfig::default(),
9021 balance: BalanceConfig::default(),
9022 ocpm: OcpmConfig::default(),
9023 audit: AuditGenerationConfig::default(),
9024 banking: datasynth_banking::BankingConfig::default(),
9025 data_quality: DataQualitySchemaConfig::default(),
9026 scenario: ScenarioConfig::default(),
9027 temporal: TemporalDriftConfig::default(),
9028 graph_export: GraphExportConfig::default(),
9029 streaming: StreamingSchemaConfig::default(),
9030 rate_limit: RateLimitSchemaConfig::default(),
9031 temporal_attributes: TemporalAttributeSchemaConfig::default(),
9032 relationships: RelationshipSchemaConfig::default(),
9033 accounting_standards: AccountingStandardsConfig::default(),
9034 audit_standards: AuditStandardsConfig::default(),
9035 distributions: Default::default(),
9036 temporal_patterns: Default::default(),
9037 vendor_network: VendorNetworkSchemaConfig::default(),
9038 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
9039 relationship_strength: RelationshipStrengthSchemaConfig::default(),
9040 cross_process_links: CrossProcessLinksSchemaConfig::default(),
9041 organizational_events: OrganizationalEventsSchemaConfig::default(),
9042 behavioral_drift: BehavioralDriftSchemaConfig::default(),
9043 market_drift: MarketDriftSchemaConfig::default(),
9044 drift_labeling: DriftLabelingSchemaConfig::default(),
9045 anomaly_injection: Default::default(),
9046 industry_specific: Default::default(),
9047 fingerprint_privacy: Default::default(),
9048 quality_gates: Default::default(),
9049 compliance: Default::default(),
9050 webhooks: Default::default(),
9051 llm: Default::default(),
9052 diffusion: Default::default(),
9053 causal: Default::default(),
9054 source_to_pay: Default::default(),
9055 financial_reporting: Default::default(),
9056 hr: Default::default(),
9057 manufacturing: Default::default(),
9058 sales_quotes: Default::default(),
9059 tax: Default::default(),
9060 treasury: Default::default(),
9061 project_accounting: Default::default(),
9062 esg: Default::default(),
9063 country_packs: None,
9064 scenarios: Default::default(),
9065 session: Default::default(),
9066 compliance_regulations: Default::default(),
9067 }
9068 }
9069
9070 #[test]
9071 fn test_enhanced_orchestrator_creation() {
9072 let config = create_test_config();
9073 let orchestrator = EnhancedOrchestrator::with_defaults(config);
9074 assert!(orchestrator.is_ok());
9075 }
9076
9077 #[test]
9078 fn test_minimal_generation() {
9079 let config = create_test_config();
9080 let phase_config = PhaseConfig {
9081 generate_master_data: false,
9082 generate_document_flows: false,
9083 generate_journal_entries: true,
9084 inject_anomalies: false,
9085 show_progress: false,
9086 ..Default::default()
9087 };
9088
9089 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9090 let result = orchestrator.generate();
9091
9092 assert!(result.is_ok());
9093 let result = result.unwrap();
9094 assert!(!result.journal_entries.is_empty());
9095 }
9096
9097 #[test]
9098 fn test_master_data_generation() {
9099 let config = create_test_config();
9100 let phase_config = PhaseConfig {
9101 generate_master_data: true,
9102 generate_document_flows: false,
9103 generate_journal_entries: false,
9104 inject_anomalies: false,
9105 show_progress: false,
9106 vendors_per_company: 5,
9107 customers_per_company: 5,
9108 materials_per_company: 10,
9109 assets_per_company: 5,
9110 employees_per_company: 10,
9111 ..Default::default()
9112 };
9113
9114 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9115 let result = orchestrator.generate().unwrap();
9116
9117 assert!(!result.master_data.vendors.is_empty());
9118 assert!(!result.master_data.customers.is_empty());
9119 assert!(!result.master_data.materials.is_empty());
9120 }
9121
9122 #[test]
9123 fn test_document_flow_generation() {
9124 let config = create_test_config();
9125 let phase_config = PhaseConfig {
9126 generate_master_data: true,
9127 generate_document_flows: true,
9128 generate_journal_entries: false,
9129 inject_anomalies: false,
9130 inject_data_quality: false,
9131 validate_balances: false,
9132 generate_ocpm_events: false,
9133 show_progress: false,
9134 vendors_per_company: 5,
9135 customers_per_company: 5,
9136 materials_per_company: 10,
9137 assets_per_company: 5,
9138 employees_per_company: 10,
9139 p2p_chains: 5,
9140 o2c_chains: 5,
9141 ..Default::default()
9142 };
9143
9144 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9145 let result = orchestrator.generate().unwrap();
9146
9147 assert!(!result.document_flows.p2p_chains.is_empty());
9149 assert!(!result.document_flows.o2c_chains.is_empty());
9150
9151 assert!(!result.document_flows.purchase_orders.is_empty());
9153 assert!(!result.document_flows.sales_orders.is_empty());
9154 }
9155
9156 #[test]
9157 fn test_anomaly_injection() {
9158 let config = create_test_config();
9159 let phase_config = PhaseConfig {
9160 generate_master_data: false,
9161 generate_document_flows: false,
9162 generate_journal_entries: true,
9163 inject_anomalies: true,
9164 show_progress: false,
9165 ..Default::default()
9166 };
9167
9168 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9169 let result = orchestrator.generate().unwrap();
9170
9171 assert!(!result.journal_entries.is_empty());
9173
9174 assert!(result.anomaly_labels.summary.is_some());
9177 }
9178
9179 #[test]
9180 fn test_full_generation_pipeline() {
9181 let config = create_test_config();
9182 let phase_config = PhaseConfig {
9183 generate_master_data: true,
9184 generate_document_flows: true,
9185 generate_journal_entries: true,
9186 inject_anomalies: false,
9187 inject_data_quality: false,
9188 validate_balances: true,
9189 generate_ocpm_events: false,
9190 show_progress: false,
9191 vendors_per_company: 3,
9192 customers_per_company: 3,
9193 materials_per_company: 5,
9194 assets_per_company: 3,
9195 employees_per_company: 5,
9196 p2p_chains: 3,
9197 o2c_chains: 3,
9198 ..Default::default()
9199 };
9200
9201 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9202 let result = orchestrator.generate().unwrap();
9203
9204 assert!(!result.master_data.vendors.is_empty());
9206 assert!(!result.master_data.customers.is_empty());
9207 assert!(!result.document_flows.p2p_chains.is_empty());
9208 assert!(!result.document_flows.o2c_chains.is_empty());
9209 assert!(!result.journal_entries.is_empty());
9210 assert!(result.statistics.accounts_count > 0);
9211
9212 assert!(!result.subledger.ap_invoices.is_empty());
9214 assert!(!result.subledger.ar_invoices.is_empty());
9215
9216 assert!(result.balance_validation.validated);
9218 assert!(result.balance_validation.entries_processed > 0);
9219 }
9220
9221 #[test]
9222 fn test_subledger_linking() {
9223 let config = create_test_config();
9224 let phase_config = PhaseConfig {
9225 generate_master_data: true,
9226 generate_document_flows: true,
9227 generate_journal_entries: false,
9228 inject_anomalies: false,
9229 inject_data_quality: false,
9230 validate_balances: false,
9231 generate_ocpm_events: false,
9232 show_progress: false,
9233 vendors_per_company: 5,
9234 customers_per_company: 5,
9235 materials_per_company: 10,
9236 assets_per_company: 3,
9237 employees_per_company: 5,
9238 p2p_chains: 5,
9239 o2c_chains: 5,
9240 ..Default::default()
9241 };
9242
9243 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9244 let result = orchestrator.generate().unwrap();
9245
9246 assert!(!result.document_flows.vendor_invoices.is_empty());
9248 assert!(!result.document_flows.customer_invoices.is_empty());
9249
9250 assert!(!result.subledger.ap_invoices.is_empty());
9252 assert!(!result.subledger.ar_invoices.is_empty());
9253
9254 assert_eq!(
9256 result.subledger.ap_invoices.len(),
9257 result.document_flows.vendor_invoices.len()
9258 );
9259
9260 assert_eq!(
9262 result.subledger.ar_invoices.len(),
9263 result.document_flows.customer_invoices.len()
9264 );
9265
9266 assert_eq!(
9268 result.statistics.ap_invoice_count,
9269 result.subledger.ap_invoices.len()
9270 );
9271 assert_eq!(
9272 result.statistics.ar_invoice_count,
9273 result.subledger.ar_invoices.len()
9274 );
9275 }
9276
9277 #[test]
9278 fn test_balance_validation() {
9279 let config = create_test_config();
9280 let phase_config = PhaseConfig {
9281 generate_master_data: false,
9282 generate_document_flows: false,
9283 generate_journal_entries: true,
9284 inject_anomalies: false,
9285 validate_balances: true,
9286 show_progress: false,
9287 ..Default::default()
9288 };
9289
9290 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9291 let result = orchestrator.generate().unwrap();
9292
9293 assert!(result.balance_validation.validated);
9295 assert!(result.balance_validation.entries_processed > 0);
9296
9297 assert!(!result.balance_validation.has_unbalanced_entries);
9299
9300 assert_eq!(
9302 result.balance_validation.total_debits,
9303 result.balance_validation.total_credits
9304 );
9305 }
9306
9307 #[test]
9308 fn test_statistics_accuracy() {
9309 let config = create_test_config();
9310 let phase_config = PhaseConfig {
9311 generate_master_data: true,
9312 generate_document_flows: false,
9313 generate_journal_entries: true,
9314 inject_anomalies: false,
9315 show_progress: false,
9316 vendors_per_company: 10,
9317 customers_per_company: 20,
9318 materials_per_company: 15,
9319 assets_per_company: 5,
9320 employees_per_company: 8,
9321 ..Default::default()
9322 };
9323
9324 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9325 let result = orchestrator.generate().unwrap();
9326
9327 assert_eq!(
9329 result.statistics.vendor_count,
9330 result.master_data.vendors.len()
9331 );
9332 assert_eq!(
9333 result.statistics.customer_count,
9334 result.master_data.customers.len()
9335 );
9336 assert_eq!(
9337 result.statistics.material_count,
9338 result.master_data.materials.len()
9339 );
9340 assert_eq!(
9341 result.statistics.total_entries as usize,
9342 result.journal_entries.len()
9343 );
9344 }
9345
9346 #[test]
9347 fn test_phase_config_defaults() {
9348 let config = PhaseConfig::default();
9349 assert!(config.generate_master_data);
9350 assert!(config.generate_document_flows);
9351 assert!(config.generate_journal_entries);
9352 assert!(!config.inject_anomalies);
9353 assert!(config.validate_balances);
9354 assert!(config.show_progress);
9355 assert!(config.vendors_per_company > 0);
9356 assert!(config.customers_per_company > 0);
9357 }
9358
9359 #[test]
9360 fn test_get_coa_before_generation() {
9361 let config = create_test_config();
9362 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
9363
9364 assert!(orchestrator.get_coa().is_none());
9366 }
9367
9368 #[test]
9369 fn test_get_coa_after_generation() {
9370 let config = create_test_config();
9371 let phase_config = PhaseConfig {
9372 generate_master_data: false,
9373 generate_document_flows: false,
9374 generate_journal_entries: true,
9375 inject_anomalies: false,
9376 show_progress: false,
9377 ..Default::default()
9378 };
9379
9380 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9381 let _ = orchestrator.generate().unwrap();
9382
9383 assert!(orchestrator.get_coa().is_some());
9385 }
9386
9387 #[test]
9388 fn test_get_master_data() {
9389 let config = create_test_config();
9390 let phase_config = PhaseConfig {
9391 generate_master_data: true,
9392 generate_document_flows: false,
9393 generate_journal_entries: false,
9394 inject_anomalies: false,
9395 show_progress: false,
9396 vendors_per_company: 5,
9397 customers_per_company: 5,
9398 materials_per_company: 5,
9399 assets_per_company: 5,
9400 employees_per_company: 5,
9401 ..Default::default()
9402 };
9403
9404 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9405 let result = orchestrator.generate().unwrap();
9406
9407 assert!(!result.master_data.vendors.is_empty());
9409 }
9410
9411 #[test]
9412 fn test_with_progress_builder() {
9413 let config = create_test_config();
9414 let orchestrator = EnhancedOrchestrator::with_defaults(config)
9415 .unwrap()
9416 .with_progress(false);
9417
9418 assert!(!orchestrator.phase_config.show_progress);
9420 }
9421
9422 #[test]
9423 fn test_multi_company_generation() {
9424 let mut config = create_test_config();
9425 config.companies.push(CompanyConfig {
9426 code: "2000".to_string(),
9427 name: "Subsidiary".to_string(),
9428 currency: "EUR".to_string(),
9429 country: "DE".to_string(),
9430 annual_transaction_volume: TransactionVolume::TenK,
9431 volume_weight: 0.5,
9432 fiscal_year_variant: "K4".to_string(),
9433 });
9434
9435 let phase_config = PhaseConfig {
9436 generate_master_data: true,
9437 generate_document_flows: false,
9438 generate_journal_entries: true,
9439 inject_anomalies: false,
9440 show_progress: false,
9441 vendors_per_company: 5,
9442 customers_per_company: 5,
9443 materials_per_company: 5,
9444 assets_per_company: 5,
9445 employees_per_company: 5,
9446 ..Default::default()
9447 };
9448
9449 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9450 let result = orchestrator.generate().unwrap();
9451
9452 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
9455 assert!(result.statistics.companies_count == 2);
9456 }
9457
9458 #[test]
9459 fn test_empty_master_data_skips_document_flows() {
9460 let config = create_test_config();
9461 let phase_config = PhaseConfig {
9462 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
9465 inject_anomalies: false,
9466 show_progress: false,
9467 ..Default::default()
9468 };
9469
9470 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9471 let result = orchestrator.generate().unwrap();
9472
9473 assert!(result.document_flows.p2p_chains.is_empty());
9475 assert!(result.document_flows.o2c_chains.is_empty());
9476 }
9477
9478 #[test]
9479 fn test_journal_entry_line_item_count() {
9480 let config = create_test_config();
9481 let phase_config = PhaseConfig {
9482 generate_master_data: false,
9483 generate_document_flows: false,
9484 generate_journal_entries: true,
9485 inject_anomalies: false,
9486 show_progress: false,
9487 ..Default::default()
9488 };
9489
9490 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9491 let result = orchestrator.generate().unwrap();
9492
9493 let calculated_line_items: u64 = result
9495 .journal_entries
9496 .iter()
9497 .map(|e| e.line_count() as u64)
9498 .sum();
9499 assert_eq!(result.statistics.total_line_items, calculated_line_items);
9500 }
9501
9502 #[test]
9503 fn test_audit_generation() {
9504 let config = create_test_config();
9505 let phase_config = PhaseConfig {
9506 generate_master_data: false,
9507 generate_document_flows: false,
9508 generate_journal_entries: true,
9509 inject_anomalies: false,
9510 show_progress: false,
9511 generate_audit: true,
9512 audit_engagements: 2,
9513 workpapers_per_engagement: 5,
9514 evidence_per_workpaper: 2,
9515 risks_per_engagement: 3,
9516 findings_per_engagement: 2,
9517 judgments_per_engagement: 2,
9518 ..Default::default()
9519 };
9520
9521 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9522 let result = orchestrator.generate().unwrap();
9523
9524 assert_eq!(result.audit.engagements.len(), 2);
9526 assert!(!result.audit.workpapers.is_empty());
9527 assert!(!result.audit.evidence.is_empty());
9528 assert!(!result.audit.risk_assessments.is_empty());
9529 assert!(!result.audit.findings.is_empty());
9530 assert!(!result.audit.judgments.is_empty());
9531
9532 assert_eq!(
9534 result.statistics.audit_engagement_count,
9535 result.audit.engagements.len()
9536 );
9537 assert_eq!(
9538 result.statistics.audit_workpaper_count,
9539 result.audit.workpapers.len()
9540 );
9541 assert_eq!(
9542 result.statistics.audit_evidence_count,
9543 result.audit.evidence.len()
9544 );
9545 assert_eq!(
9546 result.statistics.audit_risk_count,
9547 result.audit.risk_assessments.len()
9548 );
9549 assert_eq!(
9550 result.statistics.audit_finding_count,
9551 result.audit.findings.len()
9552 );
9553 assert_eq!(
9554 result.statistics.audit_judgment_count,
9555 result.audit.judgments.len()
9556 );
9557 }
9558
9559 #[test]
9560 fn test_new_phases_disabled_by_default() {
9561 let config = create_test_config();
9562 assert!(!config.llm.enabled);
9564 assert!(!config.diffusion.enabled);
9565 assert!(!config.causal.enabled);
9566
9567 let phase_config = PhaseConfig {
9568 generate_master_data: false,
9569 generate_document_flows: false,
9570 generate_journal_entries: true,
9571 inject_anomalies: false,
9572 show_progress: false,
9573 ..Default::default()
9574 };
9575
9576 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9577 let result = orchestrator.generate().unwrap();
9578
9579 assert_eq!(result.statistics.llm_enrichment_ms, 0);
9581 assert_eq!(result.statistics.llm_vendors_enriched, 0);
9582 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9583 assert_eq!(result.statistics.diffusion_samples_generated, 0);
9584 assert_eq!(result.statistics.causal_generation_ms, 0);
9585 assert_eq!(result.statistics.causal_samples_generated, 0);
9586 assert!(result.statistics.causal_validation_passed.is_none());
9587 assert_eq!(result.statistics.counterfactual_pair_count, 0);
9588 assert!(result.counterfactual_pairs.is_empty());
9589 }
9590
9591 #[test]
9592 fn test_counterfactual_generation_enabled() {
9593 let config = create_test_config();
9594 let phase_config = PhaseConfig {
9595 generate_master_data: false,
9596 generate_document_flows: false,
9597 generate_journal_entries: true,
9598 inject_anomalies: false,
9599 show_progress: false,
9600 generate_counterfactuals: true,
9601 ..Default::default()
9602 };
9603
9604 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9605 let result = orchestrator.generate().unwrap();
9606
9607 if !result.journal_entries.is_empty() {
9609 assert_eq!(
9610 result.counterfactual_pairs.len(),
9611 result.journal_entries.len()
9612 );
9613 assert_eq!(
9614 result.statistics.counterfactual_pair_count,
9615 result.journal_entries.len()
9616 );
9617 let ids: std::collections::HashSet<_> = result
9619 .counterfactual_pairs
9620 .iter()
9621 .map(|p| p.pair_id.clone())
9622 .collect();
9623 assert_eq!(ids.len(), result.counterfactual_pairs.len());
9624 }
9625 }
9626
9627 #[test]
9628 fn test_llm_enrichment_enabled() {
9629 let mut config = create_test_config();
9630 config.llm.enabled = true;
9631 config.llm.max_vendor_enrichments = 3;
9632
9633 let phase_config = PhaseConfig {
9634 generate_master_data: true,
9635 generate_document_flows: false,
9636 generate_journal_entries: false,
9637 inject_anomalies: false,
9638 show_progress: false,
9639 vendors_per_company: 5,
9640 customers_per_company: 3,
9641 materials_per_company: 3,
9642 assets_per_company: 3,
9643 employees_per_company: 3,
9644 ..Default::default()
9645 };
9646
9647 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9648 let result = orchestrator.generate().unwrap();
9649
9650 assert!(result.statistics.llm_vendors_enriched > 0);
9652 assert!(result.statistics.llm_vendors_enriched <= 3);
9653 }
9654
9655 #[test]
9656 fn test_diffusion_enhancement_enabled() {
9657 let mut config = create_test_config();
9658 config.diffusion.enabled = true;
9659 config.diffusion.n_steps = 50;
9660 config.diffusion.sample_size = 20;
9661
9662 let phase_config = PhaseConfig {
9663 generate_master_data: false,
9664 generate_document_flows: false,
9665 generate_journal_entries: true,
9666 inject_anomalies: false,
9667 show_progress: false,
9668 ..Default::default()
9669 };
9670
9671 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9672 let result = orchestrator.generate().unwrap();
9673
9674 assert_eq!(result.statistics.diffusion_samples_generated, 20);
9676 }
9677
9678 #[test]
9679 fn test_causal_overlay_enabled() {
9680 let mut config = create_test_config();
9681 config.causal.enabled = true;
9682 config.causal.template = "fraud_detection".to_string();
9683 config.causal.sample_size = 100;
9684 config.causal.validate = true;
9685
9686 let phase_config = PhaseConfig {
9687 generate_master_data: false,
9688 generate_document_flows: false,
9689 generate_journal_entries: true,
9690 inject_anomalies: false,
9691 show_progress: false,
9692 ..Default::default()
9693 };
9694
9695 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9696 let result = orchestrator.generate().unwrap();
9697
9698 assert_eq!(result.statistics.causal_samples_generated, 100);
9700 assert!(result.statistics.causal_validation_passed.is_some());
9702 }
9703
9704 #[test]
9705 fn test_causal_overlay_revenue_cycle_template() {
9706 let mut config = create_test_config();
9707 config.causal.enabled = true;
9708 config.causal.template = "revenue_cycle".to_string();
9709 config.causal.sample_size = 50;
9710 config.causal.validate = false;
9711
9712 let phase_config = PhaseConfig {
9713 generate_master_data: false,
9714 generate_document_flows: false,
9715 generate_journal_entries: true,
9716 inject_anomalies: false,
9717 show_progress: false,
9718 ..Default::default()
9719 };
9720
9721 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9722 let result = orchestrator.generate().unwrap();
9723
9724 assert_eq!(result.statistics.causal_samples_generated, 50);
9726 assert!(result.statistics.causal_validation_passed.is_none());
9728 }
9729
9730 #[test]
9731 fn test_all_new_phases_enabled_together() {
9732 let mut config = create_test_config();
9733 config.llm.enabled = true;
9734 config.llm.max_vendor_enrichments = 2;
9735 config.diffusion.enabled = true;
9736 config.diffusion.n_steps = 20;
9737 config.diffusion.sample_size = 10;
9738 config.causal.enabled = true;
9739 config.causal.sample_size = 50;
9740 config.causal.validate = true;
9741
9742 let phase_config = PhaseConfig {
9743 generate_master_data: true,
9744 generate_document_flows: false,
9745 generate_journal_entries: true,
9746 inject_anomalies: false,
9747 show_progress: false,
9748 vendors_per_company: 5,
9749 customers_per_company: 3,
9750 materials_per_company: 3,
9751 assets_per_company: 3,
9752 employees_per_company: 3,
9753 ..Default::default()
9754 };
9755
9756 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9757 let result = orchestrator.generate().unwrap();
9758
9759 assert!(result.statistics.llm_vendors_enriched > 0);
9761 assert_eq!(result.statistics.diffusion_samples_generated, 10);
9762 assert_eq!(result.statistics.causal_samples_generated, 50);
9763 assert!(result.statistics.causal_validation_passed.is_some());
9764 }
9765
9766 #[test]
9767 fn test_statistics_serialization_with_new_fields() {
9768 let stats = EnhancedGenerationStatistics {
9769 total_entries: 100,
9770 total_line_items: 500,
9771 llm_enrichment_ms: 42,
9772 llm_vendors_enriched: 10,
9773 diffusion_enhancement_ms: 100,
9774 diffusion_samples_generated: 50,
9775 causal_generation_ms: 200,
9776 causal_samples_generated: 100,
9777 causal_validation_passed: Some(true),
9778 ..Default::default()
9779 };
9780
9781 let json = serde_json::to_string(&stats).unwrap();
9782 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
9783
9784 assert_eq!(deserialized.llm_enrichment_ms, 42);
9785 assert_eq!(deserialized.llm_vendors_enriched, 10);
9786 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
9787 assert_eq!(deserialized.diffusion_samples_generated, 50);
9788 assert_eq!(deserialized.causal_generation_ms, 200);
9789 assert_eq!(deserialized.causal_samples_generated, 100);
9790 assert_eq!(deserialized.causal_validation_passed, Some(true));
9791 }
9792
9793 #[test]
9794 fn test_statistics_backward_compat_deserialization() {
9795 let old_json = r#"{
9797 "total_entries": 100,
9798 "total_line_items": 500,
9799 "accounts_count": 50,
9800 "companies_count": 1,
9801 "period_months": 12,
9802 "vendor_count": 10,
9803 "customer_count": 20,
9804 "material_count": 15,
9805 "asset_count": 5,
9806 "employee_count": 8,
9807 "p2p_chain_count": 5,
9808 "o2c_chain_count": 5,
9809 "ap_invoice_count": 5,
9810 "ar_invoice_count": 5,
9811 "ocpm_event_count": 0,
9812 "ocpm_object_count": 0,
9813 "ocpm_case_count": 0,
9814 "audit_engagement_count": 0,
9815 "audit_workpaper_count": 0,
9816 "audit_evidence_count": 0,
9817 "audit_risk_count": 0,
9818 "audit_finding_count": 0,
9819 "audit_judgment_count": 0,
9820 "anomalies_injected": 0,
9821 "data_quality_issues": 0,
9822 "banking_customer_count": 0,
9823 "banking_account_count": 0,
9824 "banking_transaction_count": 0,
9825 "banking_suspicious_count": 0,
9826 "graph_export_count": 0,
9827 "graph_node_count": 0,
9828 "graph_edge_count": 0
9829 }"#;
9830
9831 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
9832
9833 assert_eq!(stats.llm_enrichment_ms, 0);
9835 assert_eq!(stats.llm_vendors_enriched, 0);
9836 assert_eq!(stats.diffusion_enhancement_ms, 0);
9837 assert_eq!(stats.diffusion_samples_generated, 0);
9838 assert_eq!(stats.causal_generation_ms, 0);
9839 assert_eq!(stats.causal_samples_generated, 0);
9840 assert!(stats.causal_validation_passed.is_none());
9841 }
9842}